compressed-tensors 0.9.5a20250530__tar.gz → 0.9.5a20250602__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {compressed_tensors-0.9.5a20250530/src/compressed_tensors.egg-info → compressed_tensors-0.9.5a20250602}/PKG-INFO +1 -1
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/quantization/lifecycle/forward.py +35 -24
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/quantization/quant_args.py +1 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/quantization/utils/helpers.py +1 -2
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/version.py +1 -1
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602/src/compressed_tensors.egg-info}/PKG-INFO +1 -1
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_compressors/quantized_compressors/test_fp8_quant.py +3 -3
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_compressors/quantized_compressors/test_int_quant.py +2 -2
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_quantization/lifecycle/test_forward.py +12 -12
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/.github/.gitkeep +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/.github/actions/test/action.yml +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/.github/scripts/step-status +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/.github/workflows/build-test.yml +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/.github/workflows/build.yml +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/.github/workflows/report.yml +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/.github/workflows/test-check.yaml +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/.github/workflows/test.yml +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/.github/workflows/trigger-all.yml +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/.github/workflows/upload.yml +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/.gitignore +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/LICENSE +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/Makefile +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/README.md +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/examples/bit_packing/ex_quantize_and_pack.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/examples/bit_packing/int4_config.json +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/examples/bitmask_compression.ipynb +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/examples/llama_1.1b/ex_config_quantization.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/examples/llama_1.1b/ex_llmcompressor_quantization.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/examples/llama_1.1b/example_quant_config.json +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/examples/llama_1.1b/example_quant_recipe.yaml +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/examples/quantize_and_pack_int4.ipynb +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/pyproject.toml +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/setup.cfg +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/setup.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/__init__.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/README.md +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/__init__.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/base.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/compressors/__init__.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/compressors/base.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/compressors/helpers.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/compressors/model_compressors/__init__.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/compressors/model_compressors/model_compressor.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/compressors/quantized_compressors/__init__.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/compressors/quantized_compressors/base.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/compressors/sparse_compressors/__init__.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/compressors/sparse_compressors/base.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/compressors/sparse_compressors/dense.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/config/__init__.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/config/base.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/config/dense.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/config/sparse_24_bitmask.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/linear/__init__.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/linear/compressed_linear.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/quantization/__init__.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/quantization/lifecycle/apply.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/quantization/lifecycle/compressed.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/quantization/lifecycle/initialize.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/quantization/quant_config.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/quantization/quant_scheme.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/quantization/utils/__init__.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/registry/__init__.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/registry/registry.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/transform/__init__.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/transform/transform_args.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/transform/transform_config.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/transform/transform_scheme.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/utils/__init__.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/utils/helpers.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/utils/offload.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/utils/permutations_24.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/utils/permute.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/utils/safetensors_load.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors/utils/semi_structured_conversions.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors.egg-info/SOURCES.txt +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors.egg-info/dependency_links.txt +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors.egg-info/requires.txt +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/src/compressed_tensors.egg-info/top_level.txt +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/__init__.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/conftest.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_compressors/__init__.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_compressors/model_compressors/__init__.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_compressors/model_compressors/test_model_compressor.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_compressors/quantized_compressors/__init__.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_compressors/quantized_compressors/test_nvfp4_quant.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_compressors/quantized_compressors/test_pack_quant.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_compressors/sparse_compressors/__init__.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_compressors/sparse_compressors/test_bitmask.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_compressors/sparse_compressors/test_sparse_24_bitmask.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_compressors/sparse_quantized_compressors/__init__.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_configs/__init__.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_configs/test_base.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_examples/test_bitmask_compression_ipynb.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_linear/__init__.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_linear/test_compressed_linear.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_quantization/__init__.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_quantization/lifecycle/__init__.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_quantization/lifecycle/conftest.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_quantization/lifecycle/test_apply.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_quantization/lifecycle/test_dynamic_lifecycle.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_quantization/lifecycle/test_enabled.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_quantization/lifecycle/test_helpers.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_quantization/lifecycle/test_initialize.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_quantization/lifecycle/test_lifecycle.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_quantization/test_configs/__init__.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_quantization/test_configs/test_bit_depths.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_quantization/test_configs/test_strategies.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_quantization/test_quant_args.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_quantization/test_quant_config.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_quantization/test_quant_scheme.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_quantization/test_utils/test_helpers.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_registry.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_transform/test_transform_args.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_transform/test_transform_config.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_transform/test_transform_scheme.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_utils/__init__.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_utils/test_helpers.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_utils/test_offload.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_utils/test_safetensors_load.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/testing_utils.py +0 -0
- {compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/utils/copyright.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: compressed-tensors
|
3
|
-
Version: 0.9.
|
3
|
+
Version: 0.9.5a20250602
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
6
6
|
Author: Neuralmagic, Inc.
|
@@ -227,31 +227,42 @@ def _process_quantization(
|
|
227
227
|
perm = torch.argsort(g_idx)
|
228
228
|
x = safe_permute(x, perm, dim=1)
|
229
229
|
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
if do_quantize:
|
239
|
-
output[:, start:end] = _quantize(
|
240
|
-
x=x[:, start:end],
|
241
|
-
scale=sc,
|
242
|
-
zero_point=zp,
|
243
|
-
q_min=q_min,
|
244
|
-
q_max=q_max,
|
245
|
-
args=args,
|
246
|
-
dtype=dtype,
|
247
|
-
global_scale=global_scale,
|
248
|
-
)
|
230
|
+
x = torch.reshape(
|
231
|
+
x,
|
232
|
+
(
|
233
|
+
x.shape[0],
|
234
|
+
ceil(x.shape[1] / group_size),
|
235
|
+
group_size,
|
236
|
+
),
|
237
|
+
)
|
249
238
|
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
)
|
239
|
+
if do_quantize:
|
240
|
+
output = _quantize(
|
241
|
+
x=x,
|
242
|
+
scale=scale.unsqueeze(-1),
|
243
|
+
zero_point=zero_point.unsqueeze(-1) if zero_point is not None else None,
|
244
|
+
dtype=dtype,
|
245
|
+
global_scale=global_scale,
|
246
|
+
q_min=q_min,
|
247
|
+
q_max=q_max,
|
248
|
+
args=args,
|
249
|
+
)
|
250
|
+
|
251
|
+
if do_dequantize:
|
252
|
+
input = output if do_quantize else x
|
253
|
+
output = _dequantize(
|
254
|
+
x_q=input,
|
255
|
+
scale=scale.unsqueeze(-1),
|
256
|
+
zero_point=zero_point.unsqueeze(-1) if zero_point is not None else None,
|
257
|
+
global_scale=global_scale,
|
258
|
+
)
|
259
|
+
|
260
|
+
output = torch.reshape(
|
261
|
+
output,
|
262
|
+
(output.shape[0], output.shape[1] * output.shape[2]),
|
263
|
+
)
|
264
|
+
|
265
|
+
output = output.to(output_dtype)
|
255
266
|
|
256
267
|
if not is_column_order:
|
257
268
|
output = safe_permute(output, torch.argsort(perm), dim=1)
|
@@ -81,7 +81,7 @@ def calculate_qparams(
|
|
81
81
|
currently only applied/supported for Fp4
|
82
82
|
|
83
83
|
:return: tuple of the calculated scale(s) and zero point(s). For FP4, the calculated
|
84
|
-
scale
|
84
|
+
scale is of dtype FP8
|
85
85
|
"""
|
86
86
|
# based on the implementations for consuming quantized values,
|
87
87
|
# 0.0 must always be representable within the quantized range
|
@@ -490,7 +490,6 @@ def generate_global_scale(
|
|
490
490
|
attempts to use the entire FP8 dtype range while mapping a per-group max
|
491
491
|
to the FP4 max.
|
492
492
|
"""
|
493
|
-
scale_dtype = scale_data.dtype
|
494
493
|
tensor_amax = torch.abs(input_tensor.data).max().to(dtype)
|
495
494
|
global_scale = scale_data.max * quant_data.max / tensor_amax
|
496
495
|
return global_scale.to(dtype)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: compressed-tensors
|
3
|
-
Version: 0.9.
|
3
|
+
Version: 0.9.5a20250602
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
6
6
|
Author: Neuralmagic, Inc.
|
@@ -61,8 +61,8 @@ def make_dummy_g_idx(columns: int, group_size: int) -> torch.Tensor:
|
|
61
61
|
[
|
62
62
|
QuantizationStrategy.GROUP,
|
63
63
|
128,
|
64
|
-
torch.rand((512, 8
|
65
|
-
torch.zeros((512, 8
|
64
|
+
torch.rand((512, 8)) * 0.01,
|
65
|
+
torch.zeros((512, 8), dtype=torch.int8),
|
66
66
|
],
|
67
67
|
[
|
68
68
|
QuantizationStrategy.CHANNEL,
|
@@ -79,7 +79,7 @@ def test_quant_format(strategy, group_size, sc, zp):
|
|
79
79
|
"dummy.weight_zero_point": torch.tensor(zp, dtype=torch.float32),
|
80
80
|
}
|
81
81
|
if group_size is not None:
|
82
|
-
dense_state_dict["dummy.weight_g_idx"] = make_dummy_g_idx(
|
82
|
+
dense_state_dict["dummy.weight_g_idx"] = make_dummy_g_idx(1024, group_size)
|
83
83
|
|
84
84
|
quant_config = get_dummy_quant_config(strategy=strategy, group_size=group_size)
|
85
85
|
|
@@ -53,8 +53,8 @@ def get_dummy_quant_config(strategy, group_size=None, symmetric=True):
|
|
53
53
|
QuantizationStrategy.GROUP,
|
54
54
|
True,
|
55
55
|
128,
|
56
|
-
torch.rand((512, 8
|
57
|
-
torch.zeros((512, 8
|
56
|
+
torch.rand((512, 8)) * 0.01,
|
57
|
+
torch.zeros((512, 8), dtype=torch.int8),
|
58
58
|
],
|
59
59
|
[
|
60
60
|
QuantizationStrategy.CHANNEL,
|
@@ -108,8 +108,8 @@ def test_forward_quantize(
|
|
108
108
|
"int",
|
109
109
|
QuantizationStrategy.GROUP,
|
110
110
|
128,
|
111
|
-
torch.rand((512, 8
|
112
|
-
torch.zeros((512, 8
|
111
|
+
torch.rand((512, 8)) * 0.01,
|
112
|
+
torch.zeros((512, 8)),
|
113
113
|
None,
|
114
114
|
),
|
115
115
|
(
|
@@ -117,8 +117,8 @@ def test_forward_quantize(
|
|
117
117
|
"int",
|
118
118
|
QuantizationStrategy.GROUP,
|
119
119
|
128,
|
120
|
-
torch.rand((512, 8
|
121
|
-
torch.zeros((512, 8
|
120
|
+
torch.rand((512, 8)) * 0.01,
|
121
|
+
torch.zeros((512, 8)),
|
122
122
|
make_dummy_g_idx(1024, 128),
|
123
123
|
),
|
124
124
|
(
|
@@ -135,8 +135,8 @@ def test_forward_quantize(
|
|
135
135
|
"float",
|
136
136
|
QuantizationStrategy.GROUP,
|
137
137
|
128,
|
138
|
-
torch.rand((512, 8
|
139
|
-
torch.zeros((512, 8
|
138
|
+
torch.rand((512, 8)) * 0.01,
|
139
|
+
torch.zeros((512, 8)),
|
140
140
|
None,
|
141
141
|
),
|
142
142
|
(
|
@@ -144,8 +144,8 @@ def test_forward_quantize(
|
|
144
144
|
"float",
|
145
145
|
QuantizationStrategy.GROUP,
|
146
146
|
128,
|
147
|
-
torch.rand((512, 8
|
148
|
-
torch.zeros((512, 8
|
147
|
+
torch.rand((512, 8)) * 0.01,
|
148
|
+
torch.zeros((512, 8)),
|
149
149
|
make_dummy_g_idx(1024, 128),
|
150
150
|
),
|
151
151
|
],
|
@@ -174,8 +174,8 @@ def test_quantize(num_bits, type, strategy, group_size, scale, zero_point, g_idx
|
|
174
174
|
"int",
|
175
175
|
QuantizationStrategy.GROUP,
|
176
176
|
128,
|
177
|
-
torch.rand((512, 8
|
178
|
-
torch.zeros((512, 8
|
177
|
+
torch.rand((512, 8)) * 0.01,
|
178
|
+
torch.zeros((512, 8)),
|
179
179
|
None,
|
180
180
|
),
|
181
181
|
(
|
@@ -183,8 +183,8 @@ def test_quantize(num_bits, type, strategy, group_size, scale, zero_point, g_idx
|
|
183
183
|
"int",
|
184
184
|
QuantizationStrategy.GROUP,
|
185
185
|
128,
|
186
|
-
torch.rand((512, 8
|
187
|
-
torch.zeros((512, 8
|
186
|
+
torch.rand((512, 8)) * 0.01,
|
187
|
+
torch.zeros((512, 8)),
|
188
188
|
make_dummy_g_idx(1024, 128),
|
189
189
|
),
|
190
190
|
],
|
File without changes
|
File without changes
|
{compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/.github/scripts/step-status
RENAMED
File without changes
|
File without changes
|
{compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/.github/workflows/build.yml
RENAMED
File without changes
|
{compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/.github/workflows/report.yml
RENAMED
File without changes
|
File without changes
|
{compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/.github/workflows/test.yml
RENAMED
File without changes
|
File without changes
|
{compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/.github/workflows/upload.yml
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_registry.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/test_utils/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{compressed_tensors-0.9.5a20250530 → compressed_tensors-0.9.5a20250602}/tests/testing_utils.py
RENAMED
File without changes
|
File without changes
|