compressed-tensors 0.12.3a20251009__tar.gz → 0.12.3a20251013__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/.github/workflows/test.yml +1 -1
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/.github/workflows/trigger-all.yml +1 -1
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/PKG-INFO +1 -1
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/base.py +0 -3
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/quantization/lifecycle/apply.py +2 -28
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/quantization/lifecycle/initialize.py +1 -1
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/quantization/quant_args.py +7 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/version.py +1 -1
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors.egg-info/PKG-INFO +1 -1
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/conftest.py +0 -21
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_quantization/lifecycle/test_initialize.py +0 -7
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_quantization/test_configs/test_strategies.py +0 -31
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_quantization/test_utils/test_helpers.py +0 -1
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/.github/.gitkeep +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/.github/actions/test/action.yml +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/.github/scripts/step-status +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/.github/workflows/build-test.yml +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/.github/workflows/build.yml +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/.github/workflows/post-release-nightly-build.yml +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/.github/workflows/quality-check.yaml +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/.github/workflows/report.yml +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/.github/workflows/test-check.yaml +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/.github/workflows/upload.yml +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/.gitignore +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/LICENSE +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/Makefile +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/README.md +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/examples/bit_packing/ex_quantize_and_pack.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/examples/bit_packing/int4_config.json +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/examples/bitmask_compression.ipynb +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/examples/llama_1.1b/ex_config_quantization.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/examples/llama_1.1b/ex_llmcompressor_quantization.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/examples/llama_1.1b/example_quant_config.json +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/examples/llama_1.1b/example_quant_recipe.yaml +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/examples/quantize_and_pack_int4.ipynb +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/pyproject.toml +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/setup.cfg +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/setup.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/README.md +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/compressors/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/compressors/base.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/compressors/helpers.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/compressors/model_compressors/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/compressors/model_compressors/model_compressor.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/compressors/quantized_compressors/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/compressors/quantized_compressors/base.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/compressors/sparse_compressors/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/compressors/sparse_compressors/base.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/compressors/sparse_compressors/dense.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/config/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/config/base.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/config/dense.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/config/format.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/config/sparse_24_bitmask.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/linear/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/linear/compressed_linear.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/logger.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/quantization/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/quantization/lifecycle/compressed.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/quantization/lifecycle/forward.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/quantization/quant_config.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/quantization/quant_metadata.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/quantization/quant_scheme.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/quantization/utils/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/quantization/utils/helpers.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/registry/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/registry/registry.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/transform/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/transform/apply.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/transform/factory/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/transform/factory/base.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/transform/factory/hadamard.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/transform/factory/matrix_multiply.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/transform/factory/random_hadamard.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/transform/transform_args.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/transform/transform_config.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/transform/transform_scheme.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/transform/utils/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/transform/utils/hadamard.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/transform/utils/hadamards.safetensors +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/transform/utils/matrix.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/utils/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/utils/helpers.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/utils/internal.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/utils/match.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/utils/offload.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/utils/permutations_24.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/utils/safetensors_load.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/utils/semi_structured_conversions.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors/utils/type.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors.egg-info/SOURCES.txt +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors.egg-info/dependency_links.txt +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors.egg-info/requires.txt +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/src/compressed_tensors.egg-info/top_level.txt +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/mock_observer.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_compressors/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_compressors/model_compressors/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_compressors/model_compressors/test_model_compressor.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_compressors/quantized_compressors/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_compressors/quantized_compressors/test_fp8_quant.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_compressors/quantized_compressors/test_int_quant.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_compressors/quantized_compressors/test_nvfp4_quant.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_compressors/quantized_compressors/test_pack_quant.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_compressors/sparse_compressors/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_compressors/sparse_compressors/test_bitmask.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_compressors/sparse_compressors/test_sparse_24_bitmask.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_compressors/sparse_quantized_compressors/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_configs/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_configs/test_base.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_configs/test_infer_quant.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_examples/test_bitmask_compression_ipynb.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_linear/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_linear/test_compressed_linear.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_quantization/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_quantization/lifecycle/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_quantization/lifecycle/conftest.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_quantization/lifecycle/test_apply.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_quantization/lifecycle/test_dynamic_lifecycle.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_quantization/lifecycle/test_enabled.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_quantization/lifecycle/test_forward.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_quantization/lifecycle/test_lifecycle.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_quantization/lifecycle/test_static_lifecycle.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_quantization/test_configs/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_quantization/test_configs/test_bit_depths.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_quantization/test_quant_args.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_quantization/test_quant_config.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_quantization/test_quant_scheme.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_registry.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_transform/conftest.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_transform/factory/test_correctness.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_transform/factory/test_memory.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_transform/factory/test_serialization.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_transform/test_transform_args.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_transform/test_transform_config.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_transform/test_transform_scheme.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_transform/utils/test_hadamard.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_utils/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_utils/test_helpers.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_utils/test_match.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_utils/test_offload.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_utils/test_safetensors_load.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_utils/test_type.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/testing_utils.py +0 -0
- {compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/utils/copyright.py +0 -0
|
@@ -49,6 +49,6 @@ jobs:
|
|
|
49
49
|
push_to_pypi: ${{ (github.event.schedule == '30 0 * * *') || inputs.push_to_pypi || false }}
|
|
50
50
|
test_configs: '[{"python":"3.11.4","label":"k8s-util","timeout":"40","code_coverage":true},
|
|
51
51
|
{"python":"3.10.12","label":"k8s-util","timeout":"40"},
|
|
52
|
-
{"python":"3.
|
|
52
|
+
{"python":"3.13","label":"k8s-h100-solo","timeout":"40"},
|
|
53
53
|
{"python":"3.12.6","label":"k8s-a100-duo","timeout":"40"}]'
|
|
54
54
|
secrets: inherit
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: compressed-tensors
|
|
3
|
-
Version: 0.12.
|
|
3
|
+
Version: 0.12.3a20251013
|
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
|
6
6
|
Author: Neuralmagic, Inc.
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
import logging
|
|
16
16
|
from collections import OrderedDict
|
|
17
17
|
from copy import deepcopy
|
|
18
|
-
from typing import Dict,
|
|
18
|
+
from typing import Dict, List, Optional
|
|
19
19
|
from typing import OrderedDict as OrderedDictType
|
|
20
20
|
from typing import Union
|
|
21
21
|
|
|
@@ -34,7 +34,7 @@ from compressed_tensors.quantization.utils import (
|
|
|
34
34
|
KV_CACHE_TARGETS,
|
|
35
35
|
is_kv_cache_quant_scheme,
|
|
36
36
|
)
|
|
37
|
-
from compressed_tensors.utils.helpers import
|
|
37
|
+
from compressed_tensors.utils.helpers import replace_module
|
|
38
38
|
from compressed_tensors.utils.match import match_named_modules, match_targets
|
|
39
39
|
from compressed_tensors.utils.offload import update_parameter_data
|
|
40
40
|
from compressed_tensors.utils.safetensors_load import get_safetensors_folder
|
|
@@ -45,7 +45,6 @@ from torch.nn import Module
|
|
|
45
45
|
__all__ = [
|
|
46
46
|
"load_pretrained_quantization_parameters",
|
|
47
47
|
"apply_quantization_config",
|
|
48
|
-
"find_name_or_class_matches",
|
|
49
48
|
]
|
|
50
49
|
|
|
51
50
|
from compressed_tensors.quantization.utils.helpers import is_module_quantized
|
|
@@ -208,31 +207,6 @@ def process_kv_cache_config(
|
|
|
208
207
|
return config
|
|
209
208
|
|
|
210
209
|
|
|
211
|
-
@deprecated(
|
|
212
|
-
message="This function is deprecated and will be removed in a future release."
|
|
213
|
-
"Please use `match_targets` from `compressed_tensors.utils.match` instead."
|
|
214
|
-
)
|
|
215
|
-
def find_name_or_class_matches(
|
|
216
|
-
name: str, module: Module, targets: Iterable[str], check_contains: bool = False
|
|
217
|
-
) -> List[str]:
|
|
218
|
-
"""
|
|
219
|
-
Returns all targets that match the given name or the class name.
|
|
220
|
-
Returns empty list otherwise.
|
|
221
|
-
The order of the output `matches` list matters.
|
|
222
|
-
The entries are sorted in the following order:
|
|
223
|
-
1. matches on exact strings
|
|
224
|
-
2. matches on regex patterns
|
|
225
|
-
3. matches on module names
|
|
226
|
-
"""
|
|
227
|
-
if check_contains:
|
|
228
|
-
raise NotImplementedError(
|
|
229
|
-
"This function is deprecated, and the check_contains=True option has been"
|
|
230
|
-
" removed."
|
|
231
|
-
)
|
|
232
|
-
|
|
233
|
-
return match_targets(name, module, targets)
|
|
234
|
-
|
|
235
|
-
|
|
236
210
|
def _load_quant_args_from_mapping(
|
|
237
211
|
base_name: str, module_name: str, module: Module, mapping: Dict
|
|
238
212
|
):
|
|
@@ -199,7 +199,7 @@ def initialize_qparams(
|
|
|
199
199
|
expected_shape = (1,)
|
|
200
200
|
|
|
201
201
|
elif strategy == QuantizationStrategy.TOKEN:
|
|
202
|
-
|
|
202
|
+
raise ValueError("Cannot perform static token quantization")
|
|
203
203
|
|
|
204
204
|
elif strategy == QuantizationStrategy.CHANNEL:
|
|
205
205
|
if len(observed_shape) < 2:
|
|
@@ -264,6 +264,7 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
|
|
|
264
264
|
actorder = model.actorder
|
|
265
265
|
dynamic = model.dynamic
|
|
266
266
|
observer = model.observer
|
|
267
|
+
dynamic = model.dynamic
|
|
267
268
|
|
|
268
269
|
# infer strategy
|
|
269
270
|
if strategy is None:
|
|
@@ -279,6 +280,12 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
|
|
|
279
280
|
"strategy='group' and group_size = -1 for 'channel'"
|
|
280
281
|
)
|
|
281
282
|
|
|
283
|
+
# validate token strategy
|
|
284
|
+
if strategy == QuantizationStrategy.TOKEN and not dynamic:
|
|
285
|
+
raise ValueError(
|
|
286
|
+
"Cannot perform static token quantization, please use `dynamic=True`"
|
|
287
|
+
)
|
|
288
|
+
|
|
282
289
|
# validate group strategy
|
|
283
290
|
if strategy == QuantizationStrategy.GROUP:
|
|
284
291
|
if group_size is None or group_size <= 0:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: compressed-tensors
|
|
3
|
-
Version: 0.12.
|
|
3
|
+
Version: 0.12.3a20251013
|
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
|
6
6
|
Author: Neuralmagic, Inc.
|
|
@@ -29,27 +29,6 @@ def _get_dim(dim: int, value: torch.Tensor):
|
|
|
29
29
|
return reduce_dims
|
|
30
30
|
|
|
31
31
|
|
|
32
|
-
@pytest.fixture
|
|
33
|
-
def mock_per_token_calibration():
|
|
34
|
-
def update_scale_zp(module: torch.nn.Module, base_name: str, value: torch.Tensor):
|
|
35
|
-
quantization_scheme = getattr(module, "quantization_scheme", None)
|
|
36
|
-
if not quantization_scheme:
|
|
37
|
-
# no quantization scheme nothing to do
|
|
38
|
-
return
|
|
39
|
-
|
|
40
|
-
arg_name = "weights" if base_name == "weight" else f"{base_name}_activations"
|
|
41
|
-
args = getattr(quantization_scheme, arg_name, None)
|
|
42
|
-
|
|
43
|
-
dim = _get_dim({0, 1}, value)
|
|
44
|
-
min_val = torch.amin(value, dim=dim, keepdims=True)
|
|
45
|
-
max_val = torch.amax(value, dim=dim, keepdims=True)
|
|
46
|
-
scale, zp = calculate_qparams(min_val, max_val, args)
|
|
47
|
-
update_parameter_data(module, scale, f"{base_name}_scale")
|
|
48
|
-
update_parameter_data(module, zp, f"{base_name}_zero_point")
|
|
49
|
-
|
|
50
|
-
return update_scale_zp
|
|
51
|
-
|
|
52
|
-
|
|
53
32
|
@pytest.fixture
|
|
54
33
|
def mock_per_group_calibration():
|
|
55
34
|
def update_scale_zp(
|
|
@@ -176,10 +176,6 @@ def test_initialize_module_for_quantization_offloaded(
|
|
|
176
176
|
QuantizationArgs(strategy="block", block_structure=[2, 4]),
|
|
177
177
|
None,
|
|
178
178
|
),
|
|
179
|
-
(
|
|
180
|
-
QuantizationArgs(strategy="token"),
|
|
181
|
-
QuantizationArgs(strategy="token"),
|
|
182
|
-
),
|
|
183
179
|
],
|
|
184
180
|
)
|
|
185
181
|
def test_initialize_quantization_parameters(weights, input_activations):
|
|
@@ -238,9 +234,6 @@ def test_initialize_quantization_parameters(weights, input_activations):
|
|
|
238
234
|
# For activations or when block_structure is None
|
|
239
235
|
expected_shape = (1,)
|
|
240
236
|
|
|
241
|
-
elif args.strategy == QuantizationStrategy.TOKEN:
|
|
242
|
-
expected_shape = (1, 1)
|
|
243
|
-
|
|
244
237
|
if not args.dynamic:
|
|
245
238
|
assert getattr(layer, f"{q_param_name}_scale").shape == expected_shape
|
|
246
239
|
assert getattr(layer, f"{q_param_name}_zero_point").shape == expected_shape
|
|
@@ -105,34 +105,3 @@ def test_group(
|
|
|
105
105
|
model_shape[1],
|
|
106
106
|
int(model_shape[0] / group_size),
|
|
107
107
|
)
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
@torch.no_grad
|
|
111
|
-
@pytest.mark.parametrize("input_symmetry", [True, False])
|
|
112
|
-
@pytest.mark.parametrize("weight_symmetry", [True, False])
|
|
113
|
-
@pytest.mark.parametrize("input_shape", [(32, 256), (300, 200), (400, 400)])
|
|
114
|
-
def test_token(
|
|
115
|
-
mock_per_channel_calibration,
|
|
116
|
-
mock_per_token_calibration,
|
|
117
|
-
input_symmetry,
|
|
118
|
-
weight_symmetry,
|
|
119
|
-
input_shape,
|
|
120
|
-
):
|
|
121
|
-
model = Linear(input_shape[1], 256)
|
|
122
|
-
quant_config = create_config(
|
|
123
|
-
input_symmetry,
|
|
124
|
-
weight_symmetry,
|
|
125
|
-
w_strategy=QuantizationStrategy.CHANNEL,
|
|
126
|
-
i_strategy=QuantizationStrategy.TOKEN,
|
|
127
|
-
)
|
|
128
|
-
apply_quantization_config(model, quant_config)
|
|
129
|
-
|
|
130
|
-
inputs = torch.randn(input_shape)
|
|
131
|
-
mock_per_channel_calibration(model, base_name="weight", value=model.weight)
|
|
132
|
-
mock_per_token_calibration(model, base_name="input", value=inputs)
|
|
133
|
-
|
|
134
|
-
assert model.input_scale.shape == (1, 1)
|
|
135
|
-
assert model.input_zero_point.shape == (1, 1)
|
|
136
|
-
|
|
137
|
-
assert model.weight_scale.shape == (256, 1)
|
|
138
|
-
assert model.weight_zero_point.shape == (256, 1)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/mock_observer.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/test_registry.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/tests/testing_utils.py
RENAMED
|
File without changes
|
{compressed_tensors-0.12.3a20251009 → compressed_tensors-0.12.3a20251013}/utils/copyright.py
RENAMED
|
File without changes
|