compressed-tensors 0.12.3a20251009__tar.gz → 0.12.3a20251010__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/.github/workflows/test.yml +1 -1
  2. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/.github/workflows/trigger-all.yml +1 -1
  3. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/PKG-INFO +1 -1
  4. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/base.py +0 -3
  5. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/quantization/lifecycle/initialize.py +1 -1
  6. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/quantization/quant_args.py +7 -0
  7. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/version.py +1 -1
  8. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors.egg-info/PKG-INFO +1 -1
  9. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/conftest.py +0 -21
  10. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_quantization/lifecycle/test_initialize.py +0 -7
  11. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_quantization/test_configs/test_strategies.py +0 -31
  12. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_quantization/test_utils/test_helpers.py +0 -1
  13. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/.github/.gitkeep +0 -0
  14. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/.github/actions/test/action.yml +0 -0
  15. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/.github/scripts/step-status +0 -0
  16. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/.github/workflows/build-test.yml +0 -0
  17. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/.github/workflows/build.yml +0 -0
  18. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/.github/workflows/post-release-nightly-build.yml +0 -0
  19. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/.github/workflows/quality-check.yaml +0 -0
  20. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/.github/workflows/report.yml +0 -0
  21. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/.github/workflows/test-check.yaml +0 -0
  22. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/.github/workflows/upload.yml +0 -0
  23. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/.gitignore +0 -0
  24. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/LICENSE +0 -0
  25. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/Makefile +0 -0
  26. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/README.md +0 -0
  27. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/examples/bit_packing/ex_quantize_and_pack.py +0 -0
  28. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/examples/bit_packing/int4_config.json +0 -0
  29. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/examples/bitmask_compression.ipynb +0 -0
  30. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/examples/llama_1.1b/ex_config_quantization.py +0 -0
  31. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/examples/llama_1.1b/ex_llmcompressor_quantization.py +0 -0
  32. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/examples/llama_1.1b/example_quant_config.json +0 -0
  33. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/examples/llama_1.1b/example_quant_recipe.yaml +0 -0
  34. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/examples/quantize_and_pack_int4.ipynb +0 -0
  35. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/pyproject.toml +0 -0
  36. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/setup.cfg +0 -0
  37. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/setup.py +0 -0
  38. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/__init__.py +0 -0
  39. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/README.md +0 -0
  40. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/__init__.py +0 -0
  41. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/compressors/__init__.py +0 -0
  42. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/compressors/base.py +0 -0
  43. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/compressors/helpers.py +0 -0
  44. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/compressors/model_compressors/__init__.py +0 -0
  45. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/compressors/model_compressors/model_compressor.py +0 -0
  46. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/compressors/quantized_compressors/__init__.py +0 -0
  47. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/compressors/quantized_compressors/base.py +0 -0
  48. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +0 -0
  49. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py +0 -0
  50. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +0 -0
  51. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/compressors/sparse_compressors/__init__.py +0 -0
  52. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/compressors/sparse_compressors/base.py +0 -0
  53. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/compressors/sparse_compressors/dense.py +0 -0
  54. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py +0 -0
  55. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +0 -0
  56. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +0 -0
  57. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +0 -0
  58. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/config/__init__.py +0 -0
  59. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/config/base.py +0 -0
  60. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/config/dense.py +0 -0
  61. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/config/format.py +0 -0
  62. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/config/sparse_24_bitmask.py +0 -0
  63. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
  64. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/linear/__init__.py +0 -0
  65. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/linear/compressed_linear.py +0 -0
  66. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/logger.py +0 -0
  67. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/quantization/__init__.py +0 -0
  68. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
  69. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/quantization/lifecycle/apply.py +0 -0
  70. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/quantization/lifecycle/compressed.py +0 -0
  71. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/quantization/lifecycle/forward.py +0 -0
  72. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -0
  73. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/quantization/quant_config.py +0 -0
  74. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/quantization/quant_metadata.py +0 -0
  75. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/quantization/quant_scheme.py +0 -0
  76. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/quantization/utils/__init__.py +0 -0
  77. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/quantization/utils/helpers.py +0 -0
  78. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/registry/__init__.py +0 -0
  79. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/registry/registry.py +0 -0
  80. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/transform/__init__.py +0 -0
  81. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/transform/apply.py +0 -0
  82. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/transform/factory/__init__.py +0 -0
  83. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/transform/factory/base.py +0 -0
  84. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/transform/factory/hadamard.py +0 -0
  85. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/transform/factory/matrix_multiply.py +0 -0
  86. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/transform/factory/random_hadamard.py +0 -0
  87. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/transform/transform_args.py +0 -0
  88. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/transform/transform_config.py +0 -0
  89. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/transform/transform_scheme.py +0 -0
  90. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/transform/utils/__init__.py +0 -0
  91. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/transform/utils/hadamard.py +0 -0
  92. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/transform/utils/hadamards.safetensors +0 -0
  93. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/transform/utils/matrix.py +0 -0
  94. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/utils/__init__.py +0 -0
  95. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/utils/helpers.py +0 -0
  96. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/utils/internal.py +0 -0
  97. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/utils/match.py +0 -0
  98. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/utils/offload.py +0 -0
  99. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/utils/permutations_24.py +0 -0
  100. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/utils/safetensors_load.py +0 -0
  101. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/utils/semi_structured_conversions.py +0 -0
  102. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/utils/type.py +0 -0
  103. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors.egg-info/SOURCES.txt +0 -0
  104. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors.egg-info/dependency_links.txt +0 -0
  105. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors.egg-info/requires.txt +0 -0
  106. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors.egg-info/top_level.txt +0 -0
  107. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/__init__.py +0 -0
  108. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/mock_observer.py +0 -0
  109. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_compressors/__init__.py +0 -0
  110. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_compressors/model_compressors/__init__.py +0 -0
  111. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_compressors/model_compressors/test_model_compressor.py +0 -0
  112. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_compressors/quantized_compressors/__init__.py +0 -0
  113. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_compressors/quantized_compressors/test_fp8_quant.py +0 -0
  114. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_compressors/quantized_compressors/test_int_quant.py +0 -0
  115. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_compressors/quantized_compressors/test_nvfp4_quant.py +0 -0
  116. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_compressors/quantized_compressors/test_pack_quant.py +0 -0
  117. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_compressors/sparse_compressors/__init__.py +0 -0
  118. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_compressors/sparse_compressors/test_bitmask.py +0 -0
  119. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_compressors/sparse_compressors/test_sparse_24_bitmask.py +0 -0
  120. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_compressors/sparse_quantized_compressors/__init__.py +0 -0
  121. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py +0 -0
  122. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_configs/__init__.py +0 -0
  123. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_configs/test_base.py +0 -0
  124. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_configs/test_infer_quant.py +0 -0
  125. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_examples/test_bitmask_compression_ipynb.py +0 -0
  126. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_linear/__init__.py +0 -0
  127. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_linear/test_compressed_linear.py +0 -0
  128. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_quantization/__init__.py +0 -0
  129. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_quantization/lifecycle/__init__.py +0 -0
  130. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_quantization/lifecycle/conftest.py +0 -0
  131. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_quantization/lifecycle/test_apply.py +0 -0
  132. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_quantization/lifecycle/test_dynamic_lifecycle.py +0 -0
  133. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_quantization/lifecycle/test_enabled.py +0 -0
  134. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_quantization/lifecycle/test_forward.py +0 -0
  135. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_quantization/lifecycle/test_lifecycle.py +0 -0
  136. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_quantization/lifecycle/test_static_lifecycle.py +0 -0
  137. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_quantization/test_configs/__init__.py +0 -0
  138. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_quantization/test_configs/test_bit_depths.py +0 -0
  139. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_quantization/test_quant_args.py +0 -0
  140. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_quantization/test_quant_config.py +0 -0
  141. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_quantization/test_quant_scheme.py +0 -0
  142. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_registry.py +0 -0
  143. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_transform/conftest.py +0 -0
  144. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_transform/factory/test_correctness.py +0 -0
  145. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_transform/factory/test_memory.py +0 -0
  146. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_transform/factory/test_serialization.py +0 -0
  147. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_transform/test_transform_args.py +0 -0
  148. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_transform/test_transform_config.py +0 -0
  149. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_transform/test_transform_scheme.py +0 -0
  150. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_transform/utils/test_hadamard.py +0 -0
  151. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_utils/__init__.py +0 -0
  152. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_utils/test_helpers.py +0 -0
  153. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_utils/test_match.py +0 -0
  154. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_utils/test_offload.py +0 -0
  155. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_utils/test_safetensors_load.py +0 -0
  156. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/test_utils/test_type.py +0 -0
  157. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/tests/testing_utils.py +0 -0
  158. {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251010}/utils/copyright.py +0 -0
@@ -82,7 +82,7 @@ jobs:
82
82
 
83
83
  - name: set python
84
84
  id: set_python
85
- uses: actions/setup-python@v5
85
+ uses: actions/setup-python@v6
86
86
  with:
87
87
  python-version: ${{ inputs.python }}
88
88
 
@@ -49,6 +49,6 @@ jobs:
49
49
  push_to_pypi: ${{ (github.event.schedule == '30 0 * * *') || inputs.push_to_pypi || false }}
50
50
  test_configs: '[{"python":"3.11.4","label":"k8s-util","timeout":"40","code_coverage":true},
51
51
  {"python":"3.10.12","label":"k8s-util","timeout":"40"},
52
- {"python":"3.9.17","label":"k8s-h100-solo","timeout":"40"},
52
+ {"python":"3.13","label":"k8s-h100-solo","timeout":"40"},
53
53
  {"python":"3.12.6","label":"k8s-a100-duo","timeout":"40"}]'
54
54
  secrets: inherit
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.12.3a20251009
3
+ Version: 0.12.3a20251010
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -20,6 +20,3 @@ TRANSFORM_CONFIG_NAME = "transform_config"
20
20
  # required fields
21
21
  COMPRESSION_VERSION_NAME = "version"
22
22
  QUANTIZATION_METHOD_NAME = "quant_method"
23
-
24
- # auxillary configs
25
- KV_CACHE_SCHEME_NAME = "kv_cache_scheme"
@@ -199,7 +199,7 @@ def initialize_qparams(
199
199
  expected_shape = (1,)
200
200
 
201
201
  elif strategy == QuantizationStrategy.TOKEN:
202
- expected_shape = (1, 1)
202
+ raise ValueError("Cannot perform static token quantization")
203
203
 
204
204
  elif strategy == QuantizationStrategy.CHANNEL:
205
205
  if len(observed_shape) < 2:
@@ -264,6 +264,7 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
264
264
  actorder = model.actorder
265
265
  dynamic = model.dynamic
266
266
  observer = model.observer
267
+ dynamic = model.dynamic
267
268
 
268
269
  # infer strategy
269
270
  if strategy is None:
@@ -279,6 +280,12 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
279
280
  "strategy='group' and group_size = -1 for 'channel'"
280
281
  )
281
282
 
283
+ # validate token strategy
284
+ if strategy == QuantizationStrategy.TOKEN and not dynamic:
285
+ raise ValueError(
286
+ "Cannot perform static token quantization, please use `dynamic=True`"
287
+ )
288
+
282
289
  # validate group strategy
283
290
  if strategy == QuantizationStrategy.GROUP:
284
291
  if group_size is None or group_size <= 0:
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.12.3.a20251009'
20
+ __version__ = version = '0.12.3.a20251010'
21
21
  __version_tuple__ = version_tuple = (0, 12, 3)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.12.3a20251009
3
+ Version: 0.12.3a20251010
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -29,27 +29,6 @@ def _get_dim(dim: int, value: torch.Tensor):
29
29
  return reduce_dims
30
30
 
31
31
 
32
- @pytest.fixture
33
- def mock_per_token_calibration():
34
- def update_scale_zp(module: torch.nn.Module, base_name: str, value: torch.Tensor):
35
- quantization_scheme = getattr(module, "quantization_scheme", None)
36
- if not quantization_scheme:
37
- # no quantization scheme nothing to do
38
- return
39
-
40
- arg_name = "weights" if base_name == "weight" else f"{base_name}_activations"
41
- args = getattr(quantization_scheme, arg_name, None)
42
-
43
- dim = _get_dim({0, 1}, value)
44
- min_val = torch.amin(value, dim=dim, keepdims=True)
45
- max_val = torch.amax(value, dim=dim, keepdims=True)
46
- scale, zp = calculate_qparams(min_val, max_val, args)
47
- update_parameter_data(module, scale, f"{base_name}_scale")
48
- update_parameter_data(module, zp, f"{base_name}_zero_point")
49
-
50
- return update_scale_zp
51
-
52
-
53
32
  @pytest.fixture
54
33
  def mock_per_group_calibration():
55
34
  def update_scale_zp(
@@ -176,10 +176,6 @@ def test_initialize_module_for_quantization_offloaded(
176
176
  QuantizationArgs(strategy="block", block_structure=[2, 4]),
177
177
  None,
178
178
  ),
179
- (
180
- QuantizationArgs(strategy="token"),
181
- QuantizationArgs(strategy="token"),
182
- ),
183
179
  ],
184
180
  )
185
181
  def test_initialize_quantization_parameters(weights, input_activations):
@@ -238,9 +234,6 @@ def test_initialize_quantization_parameters(weights, input_activations):
238
234
  # For activations or when block_structure is None
239
235
  expected_shape = (1,)
240
236
 
241
- elif args.strategy == QuantizationStrategy.TOKEN:
242
- expected_shape = (1, 1)
243
-
244
237
  if not args.dynamic:
245
238
  assert getattr(layer, f"{q_param_name}_scale").shape == expected_shape
246
239
  assert getattr(layer, f"{q_param_name}_zero_point").shape == expected_shape
@@ -105,34 +105,3 @@ def test_group(
105
105
  model_shape[1],
106
106
  int(model_shape[0] / group_size),
107
107
  )
108
-
109
-
110
- @torch.no_grad
111
- @pytest.mark.parametrize("input_symmetry", [True, False])
112
- @pytest.mark.parametrize("weight_symmetry", [True, False])
113
- @pytest.mark.parametrize("input_shape", [(32, 256), (300, 200), (400, 400)])
114
- def test_token(
115
- mock_per_channel_calibration,
116
- mock_per_token_calibration,
117
- input_symmetry,
118
- weight_symmetry,
119
- input_shape,
120
- ):
121
- model = Linear(input_shape[1], 256)
122
- quant_config = create_config(
123
- input_symmetry,
124
- weight_symmetry,
125
- w_strategy=QuantizationStrategy.CHANNEL,
126
- i_strategy=QuantizationStrategy.TOKEN,
127
- )
128
- apply_quantization_config(model, quant_config)
129
-
130
- inputs = torch.randn(input_shape)
131
- mock_per_channel_calibration(model, base_name="weight", value=model.weight)
132
- mock_per_token_calibration(model, base_name="input", value=inputs)
133
-
134
- assert model.input_scale.shape == (1, 1)
135
- assert model.input_zero_point.shape == (1, 1)
136
-
137
- assert model.weight_scale.shape == (256, 1)
138
- assert model.weight_zero_point.shape == (256, 1)
@@ -50,7 +50,6 @@ from compressed_tensors.quantization.utils import (
50
50
  ]
51
51
  ),
52
52
  ),
53
- (True, "token", torch.Size([1, 1])),
54
53
  ],
55
54
  )
56
55
  def test_calculate_qparams(keepdims, strategy, exp_shape):