PyPI - compressed-tensors - Versions diffs - 0.9.5a20250602__tar.gz → 0.9.5a20250603__tar.gz - Mend

compressed-tensors 0.9.5a20250602tar.gz → 0.9.5a20250603tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (132) hide show

{compressed_tensors-0.9.5a20250602/src/compressed_tensors.egg-info → compressed_tensors-0.9.5a20250603}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: compressed-tensors
-Version: 0.9.5a20250602
+Version: 0.9.5a20250603
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/quantization/lifecycle/apply.py RENAMED Viewed

@@ -27,14 +27,8 @@ from compressed_tensors.quantization.lifecycle.compressed import (
 )
 from compressed_tensors.quantization.lifecycle.initialize import (
     initialize_module_for_quantization,
-    update_fused_layer_weight_global_scales,
-)
-from compressed_tensors.quantization.quant_args import (
-    FP4_E2M1_DATA,
-    FP8_E4M3_DATA,
-    QuantizationArgs,
-    QuantizationType,
 )
+from compressed_tensors.quantization.quant_args import QuantizationArgs
 from compressed_tensors.quantization.quant_config import (
     QuantizationConfig,
     QuantizationStatus,
@@ -272,9 +266,6 @@ def apply_quantization_status(model: Module, status: QuantizationStatus):
             )
         )
-        if status == QuantizationStatus.INITIALIZED:
-            update_fused_layer_weight_global_scales(model)
     if current_status < status >= QuantizationStatus.COMPRESSED > current_status:
         model.apply(compress_quantized_weights)

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/quantization/lifecycle/initialize.py RENAMED Viewed

@@ -23,26 +23,18 @@ from compressed_tensors.quantization.lifecycle.forward import (
     wrap_module_forward_quantized,
 )
 from compressed_tensors.quantization.quant_args import (
-    FP4_E2M1_DATA,
     FP8_E4M3_DATA,
     ActivationOrdering,
     QuantizationArgs,
     QuantizationStrategy,
-    QuantizationType,
 )
 from compressed_tensors.quantization.quant_config import QuantizationStatus
 from compressed_tensors.quantization.quant_scheme import QuantizationScheme
-from compressed_tensors.quantization.utils import (
-    generate_global_scale,
-    is_fp4,
-    is_kv_cache_quant_scheme,
-    iter_named_quantizable_modules,
-)
+from compressed_tensors.quantization.utils import is_fp4, is_kv_cache_quant_scheme
 from compressed_tensors.utils import (
     disable_hf_hook,
     get_execution_device,
     register_offload_parameter,
-    update_parameter_data,
 )
 from torch.nn import Module, Parameter
@@ -51,7 +43,6 @@ __all__ = [
     "initialize_module_for_quantization",
     "is_attention_module",
     "KVCacheScaleType",
-    "update_fused_layer_weight_global_scales",
 ]
@@ -162,22 +153,13 @@ def _initialize_scale_zero_point(
     # initialize on execution device to avoid performing quantized ops on cpu
     device = get_execution_device(module)
-    # 1. Create global_scales for tensor_group
+    # 1. Create global_scales for tensor_group - generates
+    # a per tensor scale
     if quantization_args.strategy == QuantizationStrategy.TENSOR_GROUP:
-        # TODO: should move to llmcompressor
-        if base_name == "weight":
-            # When applying weight-only FP4 quantization, generate a global_scale
-            # This scale is applied during runtime to ensure that the generated
-            # local scale falls properly within the FP8 range (i.e max value is FP8_max)
-            # which is the expected dtype of NVFP4A16 scales
-            value = generate_global_scale(input_tensor=module.weight)
-            value = value.to(device)
-            init_global_scale = Parameter(value, requires_grad=False)
-        else:
-            init_global_scale = Parameter(
-                torch.empty(1, dtype=torch.float32, device=device),
-                requires_grad=False,
-            )
+        init_global_scale = Parameter(
+            torch.empty(1, dtype=torch.float32, device=device),
+            requires_grad=False,
+        )
         register_offload_parameter(
             module, f"{base_name}_global_scale", init_global_scale
         )
@@ -258,91 +240,3 @@ def _initialize_attn_scales(module: Module) -> None:
         requires_grad=False,
     )
     register_offload_parameter(module, KVCacheScaleType.VALUE.value, init_scale)
-# TODO: Potentially introduce an argument to turn this off
-# Only relevant for NVFP4A16 currently
-def update_fused_layer_weight_global_scales(model: torch.nn.Module):
-    """
-    When running NVFP4A16 quantization, update the global scale
-    such that q,k,v layers are treated as one tensor with the same
-    global_scale and gate_proj/up_proj layers are treated as one tensor
-    with the same global scale. This is requirement currently being set
-    by vLLM and may be removed in the future OR potentially make it
-    an optional step.
-    :param model: model to quantize
-    """
-    def _is_attention_module(module: Module):
-        return "attention" in module.__class__.__name__.lower() and (
-            hasattr(module, "k_proj")
-            or hasattr(module, "v_proj")
-            or hasattr(module, "qkv_proj")
-        )
-    def _is_mlp_module(module: Module):
-        return "mlp" in module.__class__.__name__.lower() and (
-            hasattr(module, "gate_proj") or hasattr(module, "up_proj")
-        )
-    def _valid_fp4_quant(layer_list: List[torch.nn.Linear]):
-        """
-        Return True if all the linear layers in the layer_list are
-        NVFP4A16 quantized.
-        """
-        for layer in layer_list:
-            scheme = getattr(layer, "quantization_scheme", None)
-            if scheme is None:
-                return False
-            weight_quant_args = scheme.weights
-            if weight_quant_args is None:
-                return False
-            if not is_fp4(quantization_args=weight_quant_args):
-                return False
-        return True
-    for name, submodule in iter_named_quantizable_modules(
-        model,
-        include_attn=True,
-        include_mlp=True,
-    ):
-        if _is_attention_module(submodule):
-            # already fused/treated as one layer
-            if hasattr(submodule, "qkv_proj"):
-                continue
-            if not _valid_fp4_quant(
-                [submodule.q_proj, submodule.v_proj, submodule.k_proj]
-            ):
-                continue
-            q_weight = submodule.q_proj.weight.data
-            v_weight = submodule.v_proj.weight.data
-            k_weight = submodule.k_proj.weight.data
-            value = generate_global_scale(
-                input_tensor=torch.cat((q_weight, v_weight, k_weight), dim=0)
-            )
-            update_parameter_data(submodule.q_proj, value, "weight_global_scale")
-            update_parameter_data(submodule.k_proj, value, "weight_global_scale")
-            update_parameter_data(submodule.v_proj, value, "weight_global_scale")
-        if _is_mlp_module(submodule):
-            if not _valid_fp4_quant([submodule.gate_proj, submodule.up_proj]):
-                continue
-            gate_data = submodule.gate_proj.weight.data
-            up_data = submodule.up_proj.weight.data
-            value = generate_global_scale(
-                input_tensor=torch.cat((gate_data, up_data), dim=0)
-            )
-            update_parameter_data(submodule.gate_proj, value, "weight_global_scale")
-            update_parameter_data(submodule.up_proj, value, "weight_global_scale")

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/quantization/utils/helpers.py RENAMED Viewed

@@ -47,7 +47,7 @@ __all__ = [
     "compute_dynamic_scales_and_zp",
     "calculate_range",
     "calculate_qparams",
-    "generate_global_scale",
+    "generate_gparam",
     "is_fp4",
 ]
@@ -475,8 +475,9 @@ def parse_out_kv_cache_args(
     return kv_cache_args, quant_scheme_to_layers
-def generate_global_scale(
-    input_tensor: torch.Tensor,
+def generate_gparam(
+    updated_min_val: torch.Tensor,
+    updated_max_val: torch.Tensor,
     scale_data: Optional[FloatArgs] = FP8_E4M3_DATA,
     quant_data: Optional[FloatArgs] = FP4_E2M1_DATA,
     dtype: Optional[torch.dtype] = torch.float32,
@@ -490,6 +491,8 @@ def generate_global_scale(
     attempts to use the entire FP8 dtype range while mapping a per-group max
     to the FP4 max.
     """
-    tensor_amax = torch.abs(input_tensor.data).max().to(dtype)
-    global_scale = scale_data.max * quant_data.max / tensor_amax
+    min_vals = torch.min(updated_min_val, torch.zeros_like(updated_min_val))
+    max_vals = torch.max(updated_max_val, torch.zeros_like(updated_max_val))
+    max_val_pos = torch.max(torch.abs(min_vals), torch.abs(max_vals))
+    global_scale = scale_data.max * quant_data.max / max_val_pos
     return global_scale.to(dtype)

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/utils/offload.py RENAMED Viewed

@@ -28,15 +28,18 @@ Utilities associated with offloading functionality provided by `accelerate`.
 import contextlib
 import warnings
 from functools import wraps
-from typing import Any, Callable, Dict, Iterable, Literal, Optional, Union
+from typing import Any, Callable, Dict, Iterable, List, Literal, Optional, Union
 import torch
 try:
+    from accelerate import dispatch_model
     from accelerate.hooks import (
         AlignDevicesHook,
         add_hook_to_module,
+        attach_align_device_hook,
+        named_module_tensors,
         remove_hook_from_module,
     )
     from accelerate.utils import (
@@ -54,6 +57,9 @@ except ImportError:
     OffloadedWeightsLoader = None
     PrefixedDataset = None
     set_module_tensor_to_device = None
+    named_module_tensors = None
+    dispatch_model = None
+    attach_align_device_hook = None
 __all__ = [
@@ -70,6 +76,9 @@ __all__ = [
     "disable_offload",
     "align_modules",
     "align_module_device",
+    "register_offload_module",
+    "delete_offload_module",
+    "force_cpu_offload",
 ]
@@ -77,6 +86,11 @@ def check_accelerate(fallback: Any):
     def decorator(func: Callable[[Any], Any]):
         if not _has_accelerate:
+            if fallback == "error":
+                raise ValueError(
+                    "Please install `accelerate` in order to use this function"
+                )
             @wraps(func)
             def fallback_fn(*args, **kwargs):
                 return fallback
@@ -346,6 +360,7 @@ def delete_from_weights_map(
         )
+@check_accelerate(fallback=contextlib.nullcontext())
 @contextlib.contextmanager
 def disable_offload(module: torch.nn.Module):
     """
@@ -362,6 +377,7 @@ def disable_offload(module: torch.nn.Module):
         yield
+@check_accelerate(fallback=contextlib.nullcontext())
 @contextlib.contextmanager
 def align_modules(
     modules: Union[torch.nn.Module, Iterable[torch.nn.Module]],
@@ -383,6 +399,123 @@ def align_modules(
         yield
+def register_offload_module(base: torch.nn.Module, name: str, module: torch.nn.Module):
+    """
+    Register a submodule with offloading if the parent module is offloaded
+    :param base: module to attach submodule to
+    :param name: name of submodule
+    :param module: submodule to attach
+    """
+    if has_offloaded_params(base):
+        hook: AlignDevicesHook = base._hf_hook
+        assert hook.offload
+        assert hook.weights_map is not None
+        assert hook.tied_params_map is not None
+        # offloading kwargs for submodule
+        place_submodules = False
+        offload_buffers = True
+        # copy device offloading arguments from parent
+        current_device = next(base.parameters()).device  # assume base has parameters
+        offload_device = get_offloaded_device(base)
+        # offload parameters to weights map
+        for param_name, param in named_module_tensors(
+            module, include_buffers=offload_buffers, recurse=place_submodules
+        ):
+            offloaded = param.to(offload_device)
+            hook.tied_params_map[offloaded.data_ptr()] = {}  # (1)
+            offload_to_weights_map(hook.weights_map, f"{name}.{param_name}", offloaded)
+            # if the parent places submodules, offload here
+            if hook.place_submodules:
+                set_module_tensor_to_device(module, param_name, current_device)
+        # if the parent does not place submodules, then add a hook
+        # parameters are offloaded by `add_hook_to_module`
+        if not hook.place_submodules:
+            weights_map = PrefixedDataset(
+                hook.weights_map.dataset, prefix=f"{hook.weights_map.prefix}{name}."
+            )
+            submodule_hook = AlignDevicesHook(
+                execution_device=hook.execution_device,
+                offload=hook.offload,
+                io_same_device=False,
+                weights_map=weights_map,
+                offload_buffers=offload_buffers,
+                place_submodules=place_submodules,
+                skip_keys=None,
+                tied_params_map=hook.tied_params_map,
+            )
+            add_hook_to_module(module, submodule_hook)
+    base.register_module(name, module)
+    # (1): Since we cannot know which pointers are shared when we add parameters in an
+    # online way, assume that all pointers are shared. This comes at no runtime cost
+def delete_offload_module(base: torch.nn.Module, name: str):
+    """
+    Delete a submodule from a model which may contain offloading
+    :param base: parent module to delete submodule from
+    :param name: name of submodule on parent
+    """
+    module: torch.nn.Module = getattr(base, name)
+    for param_name, _ in list(module.named_parameters()):
+        delete_offload_parameter(module, param_name)
+    delattr(base, name)
+@check_accelerate(fallback="error")
+def force_cpu_offload(
+    module: torch.nn.Module, execution_device: torch.device
+) -> torch.nn.Module:
+    """
+    Force cpu offloading a module, primarily used for testing
+    :param module: module containing parameters to offload
+    :param execution_device: execution device submodules
+    :return: module with hooks to perform cpu offloading
+    """
+    # edge case: there is a bug in `dispatch_model` which causes
+    # the function to only work if the model contains submodules
+    if next(module.children(), None) is None:
+        attach_align_device_hook(
+            module,
+            execution_device=execution_device,
+            offload=True,
+            weights_map=module.state_dict(),
+            tied_params_map={},
+        )
+        return module
+    device_map = {}
+    def collect_device_map(name: List[str], module: torch.nn.Module):
+        if next(module.parameters(recurse=False), None) is not None:
+            device_map[".".join(name)] = "cpu"
+            return
+        else:
+            for submodule_name, submodule in module.named_children():
+                name.append(submodule_name)
+                collect_device_map(name, submodule)
+                name.pop()
+    collect_device_map([], module)
+    return dispatch_model(
+        module, device_map, main_device=execution_device, force_hooks=True
+    )
 """ Upstreamed Functions """

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/version.py RENAMED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.9.5.a20250602'
+__version__ = version = '0.9.5.a20250603'
 __version_tuple__ = version_tuple = (0, 9, 5)

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603/src/compressed_tensors.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: compressed-tensors
-Version: 0.9.5a20250602
+Version: 0.9.5a20250603
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_quantization/test_utils/test_helpers.py RENAMED Viewed

@@ -20,10 +20,7 @@ from compressed_tensors.quantization import (
     QuantizationArgs,
     QuantizationStrategy,
 )
-from compressed_tensors.quantization.utils import (
-    calculate_qparams,
-    generate_global_scale,
-)
+from compressed_tensors.quantization.utils import calculate_qparams, generate_gparam
 @pytest.mark.parametrize(
@@ -70,7 +67,8 @@ def test_fused_global_scales():
     layer = torch.nn.Linear(7, 8)
     max_tensor_value = torch.abs(layer.weight.data).max()
     # use defaults
-    global_scale = generate_global_scale(layer.weight)
+    min_val, max_val = torch.aminmax(layer.weight)
+    global_scale = generate_gparam(min_val.data, max_val.data)
     # max value should be = (448 * 6) / global_scale
     assert max_tensor_value == pytest.approx(
         FP4_E2M1_DATA.max * FP8_E4M3_DATA.max / global_scale, abs=0.001

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_utils/test_offload.py RENAMED Viewed

@@ -16,10 +16,13 @@ import torch
 from compressed_tensors.utils import (
     align_module_device,
     align_modules,
+    delete_offload_module,
     delete_offload_parameter,
     disable_hf_hook,
+    force_cpu_offload,
     get_execution_device,
     has_offloaded_params,
+    register_offload_module,
     register_offload_parameter,
     update_offload_parameter,
 )
@@ -37,9 +40,17 @@ class ExampleModule(torch.nn.Module):
         return x * self.a + self.b
+class ExampleModel(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.linear = torch.nn.Linear(1, 2)
+    def forward(self, x):
+        return self.linear(x)
 @requires_accelerate()
 def test_has_offloaded_params():
-    from accelerate.big_modeling import cpu_offload_with_hook
     from accelerate.hooks import attach_align_device_hook, remove_hook_from_module
     module = ExampleModule()
@@ -48,10 +59,6 @@ def test_has_offloaded_params():
     attach_align_device_hook(module, offload=False)
     assert not has_offloaded_params(module)
-    remove_hook_from_module(module)
-    module, _ = cpu_offload_with_hook(module)
-    assert not has_offloaded_params(module)
     remove_hook_from_module(module)
     attach_align_device_hook(module, offload=True, weights_map=module.state_dict())
     assert has_offloaded_params(module)
@@ -334,3 +341,86 @@ def test_offload_to_weights_map():
     weights_map = PrefixedDataset(OffloadedWeightsLoader({name: old_value}), prefix)
     offload_to_weights_map(weights_map, name, new_value)
     assert weights_map[name] == new_value
+@requires_gpu
+@requires_accelerate()
+@pytest.mark.parametrize("exec_device", [torch.device("cpu"), torch.device("cuda")])
+def test_register_offload_module(exec_device):
+    # no offloading
+    model = ExampleModel()
+    child = torch.nn.Linear(2, 3)
+    register_offload_module(model, "child", child)
+    register_offload_module(model.linear, "child", child)
+    assert child in model.children()
+    assert child in model.linear.children()
+    # with offloading
+    model = ExampleModel()
+    child = torch.nn.Linear(2, 3)
+    force_cpu_offload(model, exec_device)
+    register_offload_module(model, "child", child)
+    register_offload_module(model.linear, "child", child)
+    assert child in model.children()
+    assert child in model.linear.children()
+    # can run modules
+    model(torch.empty(1))
+    child(torch.empty(2, device=exec_device))
+@requires_gpu
+@requires_accelerate()
+@pytest.mark.parametrize("exec_device", [torch.device("cpu"), torch.device("cuda")])
+def test_delete_offload_module(exec_device):
+    # no offloading
+    model = ExampleModel()
+    child = torch.nn.Linear(2, 3)
+    register_offload_module(model, "child", child)
+    register_offload_module(model.linear, "child", child)
+    delete_offload_module(model, "child")
+    delete_offload_module(model.linear, "child")
+    assert not child in model.children()
+    assert not child in model.linear.children()
+    # with offloading
+    model = ExampleModel()
+    child = torch.nn.Linear(2, 3)
+    force_cpu_offload(model, exec_device)
+    register_offload_module(model, "child", child)
+    register_offload_module(model.linear, "child", child)
+    delete_offload_module(model, "child")
+    delete_offload_module(model.linear, "child")
+    assert not child in model.children()
+    assert not child in model.linear.children()
+@requires_gpu
+@requires_accelerate()
+@pytest.mark.parametrize("exec_device", [torch.device("cpu"), torch.device("cuda")])
+def test_force_cpu_offload(exec_device):
+    # single module
+    module = torch.nn.Linear(1, 2)
+    module = force_cpu_offload(module, exec_device)
+    assert has_offloaded_params(module)
+    assert module._hf_hook.offload
+    assert module.weight.device == torch.device("meta")
+    assert "weight" in module._hf_hook.weights_map
+    assert module._hf_hook.tied_params_map is not None
+    # can run
+    module(torch.empty(1, device=exec_device))
+    # model
+    model = ExampleModel()
+    model = force_cpu_offload(model, exec_device)
+    assert not has_offloaded_params(model)
+    assert has_offloaded_params(model.linear)
+    assert model.linear._hf_hook.offload
+    assert model.linear.weight.device == torch.device("meta")
+    assert "weight" in model.linear._hf_hook.weights_map
+    assert model.linear._hf_hook.tied_params_map is not None
+    # can run
+    model(torch.empty(1, device=exec_device))

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/.github/.gitkeep RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/.github/actions/test/action.yml RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/.github/scripts/step-status RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/.github/workflows/build-test.yml RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/.github/workflows/build.yml RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/.github/workflows/report.yml RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/.github/workflows/test-check.yaml RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/.github/workflows/test.yml RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/.github/workflows/trigger-all.yml RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/.github/workflows/upload.yml RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/.gitignore RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/LICENSE RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/Makefile RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/README.md RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/examples/bit_packing/ex_quantize_and_pack.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/examples/bit_packing/int4_config.json RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/examples/bitmask_compression.ipynb RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/examples/llama_1.1b/ex_config_quantization.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/examples/llama_1.1b/ex_llmcompressor_quantization.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/examples/llama_1.1b/example_quant_config.json RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/examples/llama_1.1b/example_quant_recipe.yaml RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/examples/quantize_and_pack_int4.ipynb RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/pyproject.toml RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/setup.cfg RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/setup.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/README.md RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/base.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/compressors/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/compressors/base.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/compressors/helpers.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/compressors/model_compressors/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/compressors/model_compressors/model_compressor.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/compressors/quantized_compressors/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/compressors/quantized_compressors/base.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/compressors/sparse_compressors/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/compressors/sparse_compressors/base.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/compressors/sparse_compressors/dense.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/config/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/config/base.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/config/dense.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/config/sparse_24_bitmask.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/config/sparse_bitmask.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/linear/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/linear/compressed_linear.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/quantization/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/quantization/lifecycle/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/quantization/lifecycle/compressed.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/quantization/lifecycle/forward.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/quantization/lifecycle/helpers.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/quantization/quant_args.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/quantization/quant_config.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/quantization/quant_scheme.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/quantization/utils/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/registry/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/registry/registry.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/transform/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/transform/transform_args.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/transform/transform_config.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/transform/transform_scheme.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/utils/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/utils/helpers.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/utils/permutations_24.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/utils/permute.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/utils/safetensors_load.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors/utils/semi_structured_conversions.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors.egg-info/requires.txt RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/src/compressed_tensors.egg-info/top_level.txt RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/conftest.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_compressors/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_compressors/model_compressors/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_compressors/model_compressors/test_model_compressor.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_compressors/quantized_compressors/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_compressors/quantized_compressors/test_fp8_quant.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_compressors/quantized_compressors/test_int_quant.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_compressors/quantized_compressors/test_nvfp4_quant.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_compressors/quantized_compressors/test_pack_quant.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_compressors/sparse_compressors/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_compressors/sparse_compressors/test_bitmask.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_compressors/sparse_compressors/test_sparse_24_bitmask.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_compressors/sparse_quantized_compressors/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_configs/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_configs/test_base.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_examples/test_bitmask_compression_ipynb.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_linear/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_linear/test_compressed_linear.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_quantization/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_quantization/lifecycle/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_quantization/lifecycle/conftest.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_quantization/lifecycle/test_apply.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_quantization/lifecycle/test_dynamic_lifecycle.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_quantization/lifecycle/test_enabled.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_quantization/lifecycle/test_forward.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_quantization/lifecycle/test_helpers.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_quantization/lifecycle/test_initialize.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_quantization/lifecycle/test_lifecycle.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_quantization/test_configs/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_quantization/test_configs/test_bit_depths.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_quantization/test_configs/test_strategies.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_quantization/test_quant_args.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_quantization/test_quant_config.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_quantization/test_quant_scheme.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_registry.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_transform/test_transform_args.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_transform/test_transform_config.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_transform/test_transform_scheme.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_utils/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_utils/test_helpers.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/test_utils/test_safetensors_load.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/tests/testing_utils.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250603}/utils/copyright.py RENAMED Viewed

File without changes

compressed-tensors 0.9.5a20250602__tar.gz → 0.9.5a20250603__tar.gz

compressed-tensors 0.9.5a20250602tar.gz → 0.9.5a20250603tar.gz