compressed-tensors 0.9.4a20250408__tar.gz → 0.9.4a20250410__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/.github/workflows/upload.yml +12 -4
  2. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/Makefile +2 -2
  3. {compressed_tensors-0.9.4a20250408/src/compressed_tensors.egg-info → compressed_tensors-0.9.4a20250410}/PKG-INFO +1 -1
  4. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/quantization/quant_scheme.py +13 -0
  5. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/quantization/utils/helpers.py +7 -1
  6. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/version.py +1 -1
  7. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410/src/compressed_tensors.egg-info}/PKG-INFO +1 -1
  8. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/.github/.gitkeep +0 -0
  9. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/.github/actions/test/action.yml +0 -0
  10. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/.github/scripts/step-status +0 -0
  11. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/.github/workflows/build-test.yml +0 -0
  12. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/.github/workflows/build.yml +0 -0
  13. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/.github/workflows/report.yml +0 -0
  14. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/.github/workflows/test-check.yaml +0 -0
  15. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/.github/workflows/test.yml +0 -0
  16. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/.github/workflows/trigger-all.yml +0 -0
  17. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/.gitignore +0 -0
  18. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/LICENSE +0 -0
  19. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/README.md +0 -0
  20. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/examples/bit_packing/ex_quantize_and_pack.py +0 -0
  21. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/examples/bit_packing/int4_config.json +0 -0
  22. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/examples/bitmask_compression.ipynb +0 -0
  23. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/examples/llama_1.1b/ex_config_quantization.py +0 -0
  24. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/examples/llama_1.1b/ex_llmcompressor_quantization.py +0 -0
  25. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/examples/llama_1.1b/example_quant_config.json +0 -0
  26. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/examples/llama_1.1b/example_quant_recipe.yaml +0 -0
  27. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/examples/quantize_and_pack_int4.ipynb +0 -0
  28. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/pyproject.toml +0 -0
  29. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/setup.cfg +0 -0
  30. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/setup.py +0 -0
  31. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/__init__.py +0 -0
  32. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/README.md +0 -0
  33. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/__init__.py +0 -0
  34. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/base.py +0 -0
  35. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/compressors/__init__.py +0 -0
  36. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/compressors/base.py +0 -0
  37. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/compressors/helpers.py +0 -0
  38. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/compressors/model_compressors/__init__.py +0 -0
  39. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/compressors/model_compressors/model_compressor.py +0 -0
  40. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/compressors/quantized_compressors/__init__.py +0 -0
  41. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/compressors/quantized_compressors/base.py +0 -0
  42. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +0 -0
  43. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +0 -0
  44. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/compressors/sparse_compressors/__init__.py +0 -0
  45. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/compressors/sparse_compressors/base.py +0 -0
  46. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/compressors/sparse_compressors/dense.py +0 -0
  47. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py +0 -0
  48. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +0 -0
  49. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +0 -0
  50. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +0 -0
  51. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/config/__init__.py +0 -0
  52. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/config/base.py +0 -0
  53. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/config/dense.py +0 -0
  54. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/config/sparse_24_bitmask.py +0 -0
  55. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
  56. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/linear/__init__.py +0 -0
  57. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/linear/compressed_linear.py +0 -0
  58. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/quantization/__init__.py +0 -0
  59. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
  60. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/quantization/lifecycle/apply.py +0 -0
  61. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/quantization/lifecycle/compressed.py +0 -0
  62. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/quantization/lifecycle/forward.py +0 -0
  63. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -0
  64. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/quantization/lifecycle/initialize.py +0 -0
  65. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/quantization/quant_args.py +0 -0
  66. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/quantization/quant_config.py +0 -0
  67. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/quantization/utils/__init__.py +0 -0
  68. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/registry/__init__.py +0 -0
  69. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/registry/registry.py +0 -0
  70. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/utils/__init__.py +0 -0
  71. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/utils/helpers.py +0 -0
  72. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/utils/offload.py +0 -0
  73. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/utils/permutations_24.py +0 -0
  74. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/utils/permute.py +0 -0
  75. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/utils/safetensors_load.py +0 -0
  76. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors/utils/semi_structured_conversions.py +0 -0
  77. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors.egg-info/SOURCES.txt +0 -0
  78. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors.egg-info/dependency_links.txt +0 -0
  79. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors.egg-info/requires.txt +0 -0
  80. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/src/compressed_tensors.egg-info/top_level.txt +0 -0
  81. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/__init__.py +0 -0
  82. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/conftest.py +0 -0
  83. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/test_compressors/__init__.py +0 -0
  84. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/test_compressors/model_compressors/__init__.py +0 -0
  85. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/test_compressors/model_compressors/test_model_compressor.py +0 -0
  86. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/test_compressors/quantized_compressors/__init__.py +0 -0
  87. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/test_compressors/quantized_compressors/test_fp8_quant.py +0 -0
  88. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/test_compressors/quantized_compressors/test_int_quant.py +0 -0
  89. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/test_compressors/quantized_compressors/test_pack_quant.py +0 -0
  90. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/test_compressors/sparse_compressors/__init__.py +0 -0
  91. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/test_compressors/sparse_compressors/test_bitmask.py +0 -0
  92. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/test_compressors/sparse_compressors/test_sparse_24_bitmask.py +0 -0
  93. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/test_compressors/sparse_quantized_compressors/__init__.py +0 -0
  94. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py +0 -0
  95. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/test_configs/__init__.py +0 -0
  96. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/test_configs/test_base.py +0 -0
  97. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/test_examples/test_bitmask_compression_ipynb.py +0 -0
  98. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/test_linear/__init__.py +0 -0
  99. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/test_linear/test_compressed_linear.py +0 -0
  100. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/test_quantization/__init__.py +0 -0
  101. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/test_quantization/lifecycle/__init__.py +0 -0
  102. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/test_quantization/lifecycle/conftest.py +0 -0
  103. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/test_quantization/lifecycle/test_apply.py +0 -0
  104. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/test_quantization/lifecycle/test_dynamic_lifecycle.py +0 -0
  105. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/test_quantization/lifecycle/test_enabled.py +0 -0
  106. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/test_quantization/lifecycle/test_forward.py +0 -0
  107. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/test_quantization/lifecycle/test_helpers.py +0 -0
  108. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/test_quantization/lifecycle/test_initialize.py +0 -0
  109. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/test_quantization/lifecycle/test_lifecycle.py +0 -0
  110. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/test_quantization/test_configs/__init__.py +0 -0
  111. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/test_quantization/test_configs/test_bit_depths.py +0 -0
  112. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/test_quantization/test_configs/test_strategies.py +0 -0
  113. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/test_quantization/test_quant_args.py +0 -0
  114. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/test_quantization/test_quant_config.py +0 -0
  115. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/test_quantization/test_quant_scheme.py +0 -0
  116. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/test_quantization/test_utils/test_helpers.py +0 -0
  117. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/test_registry.py +0 -0
  118. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/test_utils/__init__.py +0 -0
  119. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/test_utils/test_helpers.py +0 -0
  120. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/test_utils/test_offload.py +0 -0
  121. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/test_utils/test_safetensors_load.py +0 -0
  122. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/tests/testing_utils.py +0 -0
  123. {compressed_tensors-0.9.4a20250408 → compressed_tensors-0.9.4a20250410}/utils/copyright.py +0 -0
@@ -97,14 +97,21 @@ jobs:
97
97
  run_id: ${{ inputs.run_id }}
98
98
  asset_identifier: 'compressed*.whl'
99
99
 
100
+ - name: check if whl is new
101
+ id: check-whl
102
+ uses: neuralmagic/nm-actions/actions/check-whl-on-pypi@scm
103
+ with:
104
+ whl: ${{ steps.find-asset-whl.outputs.asset }}
105
+
100
106
  - name: upload whl to "nm-pypi"
101
- if: ${{ inputs.push_to_pypi }}
107
+ if: ${{ inputs.push_to_pypi && steps.check-whl.outputs.status }}
102
108
  uses: neuralmagic/nm-actions/actions/gcp-upload-asset@v1.1.0
103
109
  with:
104
110
  bucket_target: ${{ secrets.GCP_NM_PYPI_DIST }}
105
111
  asset: ${{ steps.find-asset-whl.outputs.asset }}
106
112
 
107
113
  - name: find tar.gz
114
+ if: ${{ steps.check-whl.outputs.status }}
108
115
  id: find-asset-targz
109
116
  uses: neuralmagic/nm-actions/actions/find-asset@v1.1.0
110
117
  with:
@@ -112,13 +119,14 @@ jobs:
112
119
  asset_identifier: 'compressed*.tar.gz'
113
120
 
114
121
  - name: upload tar.gz to "nm-pypi"
115
- if: ${{ inputs.push_to_pypi }}
122
+ if: ${{ inputs.push_to_pypi && steps.check-whl.outputs.status }}
116
123
  uses: neuralmagic/nm-actions/actions/gcp-upload-asset@v1.1.0
117
124
  with:
118
125
  bucket_target: ${{ secrets.GCP_NM_PYPI_DIST }}
119
126
  asset: ${{ steps.find-asset-targz.outputs.asset }}
120
127
 
121
128
  - name: update "nm-pypi" index
129
+ if: ${{ steps.check-whl.outputs.status }}
122
130
  uses: actions/github-script@v6
123
131
  with:
124
132
  github-token: ${{ secrets.NM_PYPI_WORKFLOW }}
@@ -133,7 +141,7 @@ jobs:
133
141
 
134
142
  # publish the wheel file to public pypi
135
143
  - name: push wheel to pypi.org
136
- if: ${{ inputs.push_to_pypi }}
144
+ if: ${{ inputs.push_to_pypi && steps.check-whl.outputs.status }}
137
145
  uses: neuralmagic/nm-actions/actions/publish-whl@v1.0.0
138
146
  with:
139
147
  username: ${{ secrets.PYPI_PUBLIC_USER }}
@@ -142,7 +150,7 @@ jobs:
142
150
 
143
151
  # publish the tar.gz file to public pypi
144
152
  - name: push wheel to pypi.org
145
- if: ${{ inputs.push_to_pypi }}
153
+ if: ${{ inputs.push_to_pypi && steps.check-whl.outputs.status }}
146
154
  uses: neuralmagic/nm-actions/actions/publish-whl@v1.0.0
147
155
  with:
148
156
  username: ${{ secrets.PYPI_PUBLIC_USER }}
@@ -1,6 +1,6 @@
1
1
  .PHONY: build docs test
2
2
 
3
- BUILD_ARGS := dev # set nightly to build nightly release
3
+ BUILD_TYPE ?= dev # set nightly to build nightly release
4
4
  PYCHECKDIRS := src tests
5
5
  PYCHECKGLOBS := 'src/**/*.py' 'tests/**/*.py' 'utils/**/*.py' 'examples/**/*.py' setup.py
6
6
  # run checks on all files for the repo
@@ -28,7 +28,7 @@ test:
28
28
  # creates wheel file
29
29
  build:
30
30
  @echo "Building the wheel for the repository";
31
- BUILD_TYPE=$(BUILD_ARGS) python3 setup.py sdist bdist_wheel;
31
+ BUILD_TYPE=$(BUILD_TYPE) python3 setup.py sdist bdist_wheel;
32
32
 
33
33
  # clean package
34
34
  clean:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.9.4a20250408
3
+ Version: 0.9.4a20250410
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -142,6 +142,18 @@ W4A16 = dict(
142
142
  ),
143
143
  )
144
144
 
145
+ # 4 bit integer weights only asymmetric quantization
146
+ W4A16_ASYM = dict(
147
+ weights=QuantizationArgs(
148
+ num_bits=4,
149
+ type=QuantizationType.INT,
150
+ strategy=QuantizationStrategy.GROUP,
151
+ group_size=128,
152
+ symmetric=False,
153
+ dynamic=False,
154
+ ),
155
+ )
156
+
145
157
  # 4 bit integer weights and 8 bit activations quantization
146
158
  INT8_W4A8 = dict(
147
159
  weights=QuantizationArgs(
@@ -205,6 +217,7 @@ PRESET_SCHEMES = {
205
217
  # Integer weight only schemes
206
218
  "W8A16": W8A16,
207
219
  "W4A16": W4A16,
220
+ "W4A16_ASYM": W4A16_ASYM,
208
221
  # Integer weight and activation schemes
209
222
  "W8A8": INT8_W8A8,
210
223
  "INT8": INT8_W8A8, # alias for W8A8
@@ -64,8 +64,11 @@ def calculate_qparams(
64
64
  :param quantization_args: settings to quantization
65
65
  :return: tuple of the calculated scale(s) and zero point(s)
66
66
  """
67
+ # based on the implementations for consuming quantized values,
68
+ # 0.0 must always be representable within the quantized range
67
69
  min_vals = torch.min(min_vals, torch.zeros_like(min_vals))
68
70
  max_vals = torch.max(max_vals, torch.zeros_like(max_vals))
71
+
69
72
  device = min_vals.device
70
73
 
71
74
  bit_min, bit_max = calculate_range(quantization_args, device)
@@ -84,6 +87,9 @@ def calculate_qparams(
84
87
  zero_points = torch.clamp(zero_points, bit_min, bit_max)
85
88
 
86
89
  # match zero-points to quantized type
90
+ # if casting to int, use round instead of truncate
91
+ if quantization_args.type == QuantizationType.INT:
92
+ zero_points = torch.round(zero_points)
87
93
  zero_points = zero_points.to(zp_dtype)
88
94
 
89
95
  if scales.ndim == 0:
@@ -96,7 +102,7 @@ def calculate_qparams(
96
102
  def compute_dynamic_scales_and_zp(value: Tensor, args: QuantizationArgs):
97
103
  """
98
104
  Returns the computed scales and zero points for dynamic activation
99
- qunatization.
105
+ quantization.
100
106
 
101
107
  :param value: tensor to calculate quantization parameters for
102
108
  :param args: quantization args
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.9.4.a20250408'
20
+ __version__ = version = '0.9.4.a20250410'
21
21
  __version_tuple__ = version_tuple = (0, 9, 4)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.9.4a20250408
3
+ Version: 0.9.4a20250410
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.