compressed-tensors 0.9.5a20250528__tar.gz → 0.9.5a20250602__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/.github/workflows/build-test.yml +1 -1
  2. {compressed_tensors-0.9.5a20250528/src/compressed_tensors.egg-info → compressed_tensors-0.9.5a20250602}/PKG-INFO +1 -1
  3. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/quantization/lifecycle/forward.py +35 -24
  4. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/quantization/quant_args.py +1 -0
  5. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/quantization/utils/helpers.py +1 -2
  6. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/version.py +1 -1
  7. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602/src/compressed_tensors.egg-info}/PKG-INFO +1 -1
  8. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_compressors/quantized_compressors/test_fp8_quant.py +3 -3
  9. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_compressors/quantized_compressors/test_int_quant.py +2 -2
  10. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_quantization/lifecycle/test_forward.py +12 -12
  11. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/.github/.gitkeep +0 -0
  12. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/.github/actions/test/action.yml +0 -0
  13. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/.github/scripts/step-status +0 -0
  14. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/.github/workflows/build.yml +0 -0
  15. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/.github/workflows/report.yml +0 -0
  16. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/.github/workflows/test-check.yaml +0 -0
  17. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/.github/workflows/test.yml +0 -0
  18. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/.github/workflows/trigger-all.yml +0 -0
  19. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/.github/workflows/upload.yml +0 -0
  20. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/.gitignore +0 -0
  21. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/LICENSE +0 -0
  22. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/Makefile +0 -0
  23. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/README.md +0 -0
  24. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/examples/bit_packing/ex_quantize_and_pack.py +0 -0
  25. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/examples/bit_packing/int4_config.json +0 -0
  26. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/examples/bitmask_compression.ipynb +0 -0
  27. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/examples/llama_1.1b/ex_config_quantization.py +0 -0
  28. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/examples/llama_1.1b/ex_llmcompressor_quantization.py +0 -0
  29. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/examples/llama_1.1b/example_quant_config.json +0 -0
  30. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/examples/llama_1.1b/example_quant_recipe.yaml +0 -0
  31. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/examples/quantize_and_pack_int4.ipynb +0 -0
  32. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/pyproject.toml +0 -0
  33. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/setup.cfg +0 -0
  34. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/setup.py +0 -0
  35. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/__init__.py +0 -0
  36. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/README.md +0 -0
  37. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/__init__.py +0 -0
  38. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/base.py +0 -0
  39. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/compressors/__init__.py +0 -0
  40. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/compressors/base.py +0 -0
  41. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/compressors/helpers.py +0 -0
  42. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/compressors/model_compressors/__init__.py +0 -0
  43. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/compressors/model_compressors/model_compressor.py +0 -0
  44. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/compressors/quantized_compressors/__init__.py +0 -0
  45. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/compressors/quantized_compressors/base.py +0 -0
  46. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +0 -0
  47. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py +0 -0
  48. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +0 -0
  49. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/compressors/sparse_compressors/__init__.py +0 -0
  50. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/compressors/sparse_compressors/base.py +0 -0
  51. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/compressors/sparse_compressors/dense.py +0 -0
  52. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py +0 -0
  53. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +0 -0
  54. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +0 -0
  55. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +0 -0
  56. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/config/__init__.py +0 -0
  57. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/config/base.py +0 -0
  58. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/config/dense.py +0 -0
  59. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/config/sparse_24_bitmask.py +0 -0
  60. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
  61. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/linear/__init__.py +0 -0
  62. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/linear/compressed_linear.py +0 -0
  63. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/quantization/__init__.py +0 -0
  64. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
  65. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/quantization/lifecycle/apply.py +0 -0
  66. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/quantization/lifecycle/compressed.py +0 -0
  67. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -0
  68. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/quantization/lifecycle/initialize.py +0 -0
  69. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/quantization/quant_config.py +0 -0
  70. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/quantization/quant_scheme.py +0 -0
  71. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/quantization/utils/__init__.py +0 -0
  72. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/registry/__init__.py +0 -0
  73. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/registry/registry.py +0 -0
  74. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/transform/__init__.py +0 -0
  75. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/transform/transform_args.py +0 -0
  76. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/transform/transform_config.py +0 -0
  77. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/transform/transform_scheme.py +0 -0
  78. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/utils/__init__.py +0 -0
  79. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/utils/helpers.py +0 -0
  80. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/utils/offload.py +0 -0
  81. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/utils/permutations_24.py +0 -0
  82. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/utils/permute.py +0 -0
  83. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/utils/safetensors_load.py +0 -0
  84. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/utils/semi_structured_conversions.py +0 -0
  85. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors.egg-info/SOURCES.txt +0 -0
  86. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors.egg-info/dependency_links.txt +0 -0
  87. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors.egg-info/requires.txt +0 -0
  88. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors.egg-info/top_level.txt +0 -0
  89. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/__init__.py +0 -0
  90. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/conftest.py +0 -0
  91. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_compressors/__init__.py +0 -0
  92. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_compressors/model_compressors/__init__.py +0 -0
  93. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_compressors/model_compressors/test_model_compressor.py +0 -0
  94. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_compressors/quantized_compressors/__init__.py +0 -0
  95. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_compressors/quantized_compressors/test_nvfp4_quant.py +0 -0
  96. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_compressors/quantized_compressors/test_pack_quant.py +0 -0
  97. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_compressors/sparse_compressors/__init__.py +0 -0
  98. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_compressors/sparse_compressors/test_bitmask.py +0 -0
  99. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_compressors/sparse_compressors/test_sparse_24_bitmask.py +0 -0
  100. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_compressors/sparse_quantized_compressors/__init__.py +0 -0
  101. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py +0 -0
  102. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_configs/__init__.py +0 -0
  103. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_configs/test_base.py +0 -0
  104. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_examples/test_bitmask_compression_ipynb.py +0 -0
  105. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_linear/__init__.py +0 -0
  106. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_linear/test_compressed_linear.py +0 -0
  107. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_quantization/__init__.py +0 -0
  108. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_quantization/lifecycle/__init__.py +0 -0
  109. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_quantization/lifecycle/conftest.py +0 -0
  110. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_quantization/lifecycle/test_apply.py +0 -0
  111. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_quantization/lifecycle/test_dynamic_lifecycle.py +0 -0
  112. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_quantization/lifecycle/test_enabled.py +0 -0
  113. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_quantization/lifecycle/test_helpers.py +0 -0
  114. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_quantization/lifecycle/test_initialize.py +0 -0
  115. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_quantization/lifecycle/test_lifecycle.py +0 -0
  116. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_quantization/test_configs/__init__.py +0 -0
  117. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_quantization/test_configs/test_bit_depths.py +0 -0
  118. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_quantization/test_configs/test_strategies.py +0 -0
  119. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_quantization/test_quant_args.py +0 -0
  120. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_quantization/test_quant_config.py +0 -0
  121. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_quantization/test_quant_scheme.py +0 -0
  122. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_quantization/test_utils/test_helpers.py +0 -0
  123. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_registry.py +0 -0
  124. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_transform/test_transform_args.py +0 -0
  125. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_transform/test_transform_config.py +0 -0
  126. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_transform/test_transform_scheme.py +0 -0
  127. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_utils/__init__.py +0 -0
  128. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_utils/test_helpers.py +0 -0
  129. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_utils/test_offload.py +0 -0
  130. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/test_utils/test_safetensors_load.py +0 -0
  131. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/tests/testing_utils.py +0 -0
  132. {compressed_tensors-0.9.5a20250528 → compressed_tensors-0.9.5a20250602}/utils/copyright.py +0 -0
@@ -59,7 +59,7 @@ jobs:
59
59
  needs: [TEST]
60
60
  uses: ./.github/workflows/upload.yml
61
61
  with:
62
- label: k8s-util
62
+ label: gcp-k8s-util
63
63
  timeout: 40
64
64
  run_id: ${{ github.run_id }}
65
65
  push_to_pypi: ${{ inputs.push_to_pypi }}
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.9.5a20250528
3
+ Version: 0.9.5a20250602
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -227,31 +227,42 @@ def _process_quantization(
227
227
  perm = torch.argsort(g_idx)
228
228
  x = safe_permute(x, perm, dim=1)
229
229
 
230
- # TODO: experiment with vectorizing for loop for performance
231
- end = 0
232
- for index, group_count in enumerate(group_sizes):
233
- sc = scale[:, index].view(-1, 1)
234
- zp = zero_point[:, index].view(-1, 1) if zero_point is not None else None
235
-
236
- start = end
237
- end = start + group_count
238
- if do_quantize:
239
- output[:, start:end] = _quantize(
240
- x=x[:, start:end],
241
- scale=sc,
242
- zero_point=zp,
243
- q_min=q_min,
244
- q_max=q_max,
245
- args=args,
246
- dtype=dtype,
247
- global_scale=global_scale,
248
- )
230
+ x = torch.reshape(
231
+ x,
232
+ (
233
+ x.shape[0],
234
+ ceil(x.shape[1] / group_size),
235
+ group_size,
236
+ ),
237
+ )
249
238
 
250
- if do_dequantize:
251
- input = output[:, start:end] if do_quantize else x[:, start:end]
252
- output[:, start:end] = _dequantize(
253
- x_q=input, scale=sc, zero_point=zp, global_scale=global_scale
254
- )
239
+ if do_quantize:
240
+ output = _quantize(
241
+ x=x,
242
+ scale=scale.unsqueeze(-1),
243
+ zero_point=zero_point.unsqueeze(-1) if zero_point is not None else None,
244
+ dtype=dtype,
245
+ global_scale=global_scale,
246
+ q_min=q_min,
247
+ q_max=q_max,
248
+ args=args,
249
+ )
250
+
251
+ if do_dequantize:
252
+ input = output if do_quantize else x
253
+ output = _dequantize(
254
+ x_q=input,
255
+ scale=scale.unsqueeze(-1),
256
+ zero_point=zero_point.unsqueeze(-1) if zero_point is not None else None,
257
+ global_scale=global_scale,
258
+ )
259
+
260
+ output = torch.reshape(
261
+ output,
262
+ (output.shape[0], output.shape[1] * output.shape[2]),
263
+ )
264
+
265
+ output = output.to(output_dtype)
255
266
 
256
267
  if not is_column_order:
257
268
  output = safe_permute(output, torch.argsort(perm), dim=1)
@@ -53,6 +53,7 @@ class FP4_E2M1_DATA(FloatArgs):
53
53
  min = -6.0
54
54
 
55
55
  @staticmethod
56
+ @torch.compile
56
57
  def cast_to_fp4(x):
57
58
  sign = torch.sign(x)
58
59
  x = torch.abs(x)
@@ -81,7 +81,7 @@ def calculate_qparams(
81
81
  currently only applied/supported for Fp4
82
82
 
83
83
  :return: tuple of the calculated scale(s) and zero point(s). For FP4, the calculated
84
- scale if of dtype FP8
84
+ scale is of dtype FP8
85
85
  """
86
86
  # based on the implementations for consuming quantized values,
87
87
  # 0.0 must always be representable within the quantized range
@@ -490,7 +490,6 @@ def generate_global_scale(
490
490
  attempts to use the entire FP8 dtype range while mapping a per-group max
491
491
  to the FP4 max.
492
492
  """
493
- scale_dtype = scale_data.dtype
494
493
  tensor_amax = torch.abs(input_tensor.data).max().to(dtype)
495
494
  global_scale = scale_data.max * quant_data.max / tensor_amax
496
495
  return global_scale.to(dtype)
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.9.5.a20250528'
20
+ __version__ = version = '0.9.5.a20250602'
21
21
  __version_tuple__ = version_tuple = (0, 9, 5)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.9.5a20250528
3
+ Version: 0.9.5a20250602
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -61,8 +61,8 @@ def make_dummy_g_idx(columns: int, group_size: int) -> torch.Tensor:
61
61
  [
62
62
  QuantizationStrategy.GROUP,
63
63
  128,
64
- torch.rand((512, 8, 1)) * 0.01,
65
- torch.zeros((512, 8, 1), dtype=torch.int8),
64
+ torch.rand((512, 8)) * 0.01,
65
+ torch.zeros((512, 8), dtype=torch.int8),
66
66
  ],
67
67
  [
68
68
  QuantizationStrategy.CHANNEL,
@@ -79,7 +79,7 @@ def test_quant_format(strategy, group_size, sc, zp):
79
79
  "dummy.weight_zero_point": torch.tensor(zp, dtype=torch.float32),
80
80
  }
81
81
  if group_size is not None:
82
- dense_state_dict["dummy.weight_g_idx"] = make_dummy_g_idx(512, group_size)
82
+ dense_state_dict["dummy.weight_g_idx"] = make_dummy_g_idx(1024, group_size)
83
83
 
84
84
  quant_config = get_dummy_quant_config(strategy=strategy, group_size=group_size)
85
85
 
@@ -53,8 +53,8 @@ def get_dummy_quant_config(strategy, group_size=None, symmetric=True):
53
53
  QuantizationStrategy.GROUP,
54
54
  True,
55
55
  128,
56
- torch.rand((512, 8, 1)) * 0.01,
57
- torch.zeros((512, 8, 1), dtype=torch.int8),
56
+ torch.rand((512, 8)) * 0.01,
57
+ torch.zeros((512, 8), dtype=torch.int8),
58
58
  ],
59
59
  [
60
60
  QuantizationStrategy.CHANNEL,
@@ -108,8 +108,8 @@ def test_forward_quantize(
108
108
  "int",
109
109
  QuantizationStrategy.GROUP,
110
110
  128,
111
- torch.rand((512, 8, 1)) * 0.01,
112
- torch.zeros((512, 8, 1)),
111
+ torch.rand((512, 8)) * 0.01,
112
+ torch.zeros((512, 8)),
113
113
  None,
114
114
  ),
115
115
  (
@@ -117,8 +117,8 @@ def test_forward_quantize(
117
117
  "int",
118
118
  QuantizationStrategy.GROUP,
119
119
  128,
120
- torch.rand((512, 8, 1)) * 0.01,
121
- torch.zeros((512, 8, 1)),
120
+ torch.rand((512, 8)) * 0.01,
121
+ torch.zeros((512, 8)),
122
122
  make_dummy_g_idx(1024, 128),
123
123
  ),
124
124
  (
@@ -135,8 +135,8 @@ def test_forward_quantize(
135
135
  "float",
136
136
  QuantizationStrategy.GROUP,
137
137
  128,
138
- torch.rand((512, 8, 1)) * 0.01,
139
- torch.zeros((512, 8, 1)),
138
+ torch.rand((512, 8)) * 0.01,
139
+ torch.zeros((512, 8)),
140
140
  None,
141
141
  ),
142
142
  (
@@ -144,8 +144,8 @@ def test_forward_quantize(
144
144
  "float",
145
145
  QuantizationStrategy.GROUP,
146
146
  128,
147
- torch.rand((512, 8, 1)) * 0.01,
148
- torch.zeros((512, 8, 1)),
147
+ torch.rand((512, 8)) * 0.01,
148
+ torch.zeros((512, 8)),
149
149
  make_dummy_g_idx(1024, 128),
150
150
  ),
151
151
  ],
@@ -174,8 +174,8 @@ def test_quantize(num_bits, type, strategy, group_size, scale, zero_point, g_idx
174
174
  "int",
175
175
  QuantizationStrategy.GROUP,
176
176
  128,
177
- torch.rand((512, 8, 1)) * 0.01,
178
- torch.zeros((512, 8, 1)),
177
+ torch.rand((512, 8)) * 0.01,
178
+ torch.zeros((512, 8)),
179
179
  None,
180
180
  ),
181
181
  (
@@ -183,8 +183,8 @@ def test_quantize(num_bits, type, strategy, group_size, scale, zero_point, g_idx
183
183
  "int",
184
184
  QuantizationStrategy.GROUP,
185
185
  128,
186
- torch.rand((512, 8, 1)) * 0.01,
187
- torch.zeros((512, 8, 1)),
186
+ torch.rand((512, 8)) * 0.01,
187
+ torch.zeros((512, 8)),
188
188
  make_dummy_g_idx(1024, 128),
189
189
  ),
190
190
  ],