PyPI - compressed-tensors - Versions diffs - 0.12.3a20251013__py3-none-any.whl → 0.12.3a20251028__py3-none-any.whl - Mend

compressed-tensors 0.12.3a20251013py3-none-any.whl → 0.12.3a20251028py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

compressed_tensors/compressors/quantized_compressors/__init__.py CHANGED Viewed

@@ -14,6 +14,6 @@
 # flake8: noqa
 from .base import *
+from .fp4_quantized import *
 from .naive_quantized import *
-from .nvfp4_quantized import *
 from .pack_quantized import *

compressed_tensors/compressors/quantized_compressors/{nvfp4_quantized.py → fp4_quantized.py} RENAMED Viewed

@@ -123,6 +123,15 @@ class NVFP4PackedCompressor(BaseQuantizationCompressor):
         return decompressed_weight
+@BaseCompressor.register(name=CompressionFormat.mxfp4_pack_quantized.value)
+class MXFP4PackedCompressor(NVFP4PackedCompressor):
+    """
+    Alias for mxfp4 quantized models
+    """
+    pass
 @torch.compile(fullgraph=True, dynamic=True)
 def pack_fp4_to_uint8(x: torch.Tensor) -> torch.Tensor:
     """

compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py CHANGED Viewed

@@ -19,7 +19,7 @@ import torch
 from compressed_tensors.compressors.base import BaseCompressor
 from compressed_tensors.compressors.sparse_compressors.base import BaseSparseCompressor
 from compressed_tensors.config import CompressionFormat, SparsityStructure
-from compressed_tensors.quantization import FP8_DTYPE
+from compressed_tensors.quantization import FP8_E4M3_DATA
 from compressed_tensors.utils import merge_names, pack_bitmasks, unpack_bitmasks
 from torch import Tensor
@@ -189,11 +189,11 @@ def sparse24_bitmask_compress(
     bytemasks = get_24_bytemasks(tensor=tensor)
-    if tensor.dtype == FP8_DTYPE:
+    if tensor.dtype == FP8_E4M3_DATA.dtype:
         # acces raw bytes of the tensor
         tensor_view = tensor.view(torch.int8)
         values = tensor_view[bytemasks]
-        values = values.view(FP8_DTYPE)
+        values = values.view(FP8_E4M3_DATA.dtype)
     else:
         values = tensor[bytemasks]
@@ -241,7 +241,7 @@ def get_24_bytemasks(tensor):
                         multiple of 4.
     """
     original_dtype = tensor.dtype
-    if tensor.dtype == FP8_DTYPE:
+    if tensor.dtype == FP8_E4M3_DATA.dtype:
         tensor = tensor.view(torch.int8)
     original_shape = tensor.shape
     num_elements = tensor.numel()

compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py CHANGED Viewed

@@ -18,7 +18,7 @@ import torch
 from compressed_tensors.compressors.base import BaseCompressor
 from compressed_tensors.compressors.sparse_compressors.base import BaseSparseCompressor
 from compressed_tensors.config import CompressionFormat
-from compressed_tensors.quantization import FP8_DTYPE
+from compressed_tensors.quantization import FP8_E4M3_DATA
 from compressed_tensors.utils import merge_names, pack_bitmasks, unpack_bitmasks
 from torch import Tensor
@@ -138,11 +138,11 @@ def bitmask_compress(tensor: Tensor) -> Tuple[Tensor, Tensor, Tensor]:
     bytemasks = tensor != 0
     row_counts = bytemasks.sum(dim=-1)
     row_offsets = torch.cumsum(row_counts, 0) - row_counts
-    if tensor.dtype == FP8_DTYPE:
+    if tensor.dtype == FP8_E4M3_DATA.dtype:
         # acces raw bytes of the tensor
         tensor_view = tensor.view(torch.int8)
         values = tensor_view[bytemasks]
-        values = values.view(FP8_DTYPE)
+        values = values.view(FP8_E4M3_DATA.dtype)
     else:
         values = tensor[bytemasks]
     bitmasks_packed = pack_bitmasks(bytemasks)

compressed_tensors/config/base.py CHANGED Viewed

@@ -34,6 +34,7 @@ class CompressionFormat(Enum):
     marlin_24 = "marlin-24"
     mixed_precision = "mixed-precision"
     nvfp4_pack_quantized = "nvfp4-pack-quantized"
+    mxfp4_pack_quantized = "mxfp4-pack-quantized"
 @unique

compressed_tensors/modeling/__init__.py ADDED Viewed

@@ -0,0 +1,18 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# flake8: noqa
+# isort: off
+from .kvcache import *
+from .attention import *

compressed_tensors/modeling/attention.py ADDED Viewed

@@ -0,0 +1,147 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import inspect
+from typing import Callable, Optional
+from compressed_tensors.modeling.kvcache import initialize_hooked_kv_cache
+from compressed_tensors.quantization.lifecycle.forward import forward_quantize
+from compressed_tensors.utils import getattr_chain
+from compressed_tensors.utils.internal import InternalModule
+from torch import Tensor
+from torch.nn import Module
+from torch.utils.hooks import RemovableHandle
+from transformers import PretrainedConfig, PreTrainedModel
+from transformers.masking_utils import ALL_MASK_ATTENTION_FUNCTIONS
+from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS
+__all__ = [
+    "QuantizedAttentionImpl",
+    "initialize_hooked_attention",
+    "register_query_hook",
+    "IMPL_ATTR",
+]
+IMPL_ATTR = "impl"
+HOOKED_ATTENTION_NAME = "ct_hooked_attention"
+class QuantizedAttentionImpl(InternalModule):
+    """
+    QuantizedAttentionImpl module which wraps the functionality of the original
+    attention implementation. Unlike the original attention function, this
+    implementation is a `torch.nn.Module` which can be hooked to trigger
+    transforms and calibration hooks.
+    This module works by being registered as a submodule to attention modules via
+    `initialize_hooked_attention`, registering a new attention implementation function
+    which calls this module, then setting the model attention implementation to the new
+    function. After triggering hooks and quantization, this module calls the original
+    attention implementation function.
+    """
+    _original_impl = "eager"
+    def __init__(self, config: PretrainedConfig):
+        super().__init__()
+        self.config = config
+    def forward(
+        self,
+        module: Module,
+        query: Tensor,
+        key: Tensor,
+        value: Tensor,
+        *args,
+        **kwargs,
+    ):
+        # quantization
+        quant_args_attr = "quantization_scheme.input_activations"
+        quant_args = getattr_chain(module, quant_args_attr, None)
+        quant_enabled = getattr(module, "quantization_enabled", True)
+        if quant_args is not None and quant_enabled:
+            query = forward_quantize(module, query, "q", quant_args)
+        # original attention
+        return ALL_ATTENTION_FUNCTIONS[QuantizedAttentionImpl._original_impl](
+            module,
+            query,
+            key,
+            value,
+            *args,
+            **kwargs,
+        )
+# ----- initialize ----- #
+def _hooked_attention(module: Module, *args, **kwargs):
+    assert hasattr(module, IMPL_ATTR), (
+        f"Using {HOOKED_ATTENTION_NAME} attention implementation, "
+        f"but attention module does not have {IMPL_ATTR} submodule."
+    )
+    return getattr(module, IMPL_ATTR)(module, *args, **kwargs)
+def initialize_hooked_attention(model: PreTrainedModel, module: Module):
+    """
+    Initialize `QuantizedAttentionImpl` and `QuantizedKVCache` instances
+    attached to attention. Assumes that only one model is hooked at a time.
+    :param model: parent model of attention module
+    :param module: attention module to initialize with
+    """
+    if not hasattr(module, IMPL_ATTR):
+        module.register_module(IMPL_ATTR, QuantizedAttentionImpl(model.config))
+    if model.config._attn_implementation != HOOKED_ATTENTION_NAME:
+        QuantizedAttentionImpl._original_impl = model.config._attn_implementation
+        original_mask = ALL_MASK_ATTENTION_FUNCTIONS[model.config._attn_implementation]
+        ALL_ATTENTION_FUNCTIONS.register(HOOKED_ATTENTION_NAME, _hooked_attention)
+        ALL_MASK_ATTENTION_FUNCTIONS.register(HOOKED_ATTENTION_NAME, original_mask)
+        model.set_attn_implementation(HOOKED_ATTENTION_NAME)
+        assert model.config._attn_implementation == HOOKED_ATTENTION_NAME
+    initialize_hooked_kv_cache(model, module)
+# ----- hooks ----- #
+def register_query_hook(
+    module: Module, hook: Callable[[Module, Tensor], Optional[Tensor]]
+) -> RemovableHandle:
+    """
+    Register a hook which takes post-rope query states as an argument and
+    returns the modified query states or `None`
+    :param module: attention module to add hook to
+    :param hook: query hook function
+    """
+    impl: QuantizedAttentionImpl = getattr(module, IMPL_ATTR)
+    def _hook(impl: QuantizedAttentionImpl, args, kwargs):
+        bound = inspect.signature(impl.forward).bind(*args, **kwargs)
+        value = hook(module, bound.arguments["query"])
+        if value is not None:
+            bound.arguments["query"] = value
+        return bound.args, bound.kwargs
+    return impl.register_forward_pre_hook(_hook, with_kwargs=True)

compressed_tensors/modeling/kvcache.py ADDED Viewed

@@ -0,0 +1,183 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import inspect
+from typing import Any, Callable, Dict, List, Optional, Tuple
+from weakref import ReferenceType, ref
+from compressed_tensors.quantization.lifecycle.forward import forward_quantize
+from compressed_tensors.utils import getattr_chain
+from compressed_tensors.utils.internal import InternalModule
+from torch import Tensor
+from torch.nn import Module
+from torch.utils.hooks import RemovableHandle
+from transformers import Cache, PretrainedConfig, PreTrainedModel
+__all__ = [
+    "QuantizedKVCache",
+    "initialize_hooked_kv_cache",
+    "register_key_hook",
+    "register_value_hook",
+    "KV_CACHE_ATTR",
+]
+KV_CACHE_ATTR = "kv_cache"
+class QuantizedKVCache(InternalModule):
+    """
+    QuantizedKVCache module which wraps the functionality of any existing kvcache args.
+    Unlike transform Cache instances, this cache is a `torch.nn.Module` which can be
+    hooked to trigger transforms and calibration hooks.
+    This module works by being registered as a submodule to attention modules via
+    `initialize_hooked_kv_cache`, then adding a hook which replaces `past_key_values`
+    kwargs with this module. This module adopts the functionality of the replaced cache,
+    preserving caching functionality such as sliding window attention, ect.
+    :param attn_module: parent attention module
+    """
+    def __init__(self, config: PretrainedConfig, attn_module: Module):
+        super().__init__()
+        self.config = config
+        self.attn_module = ref(attn_module)  # avoid circular reference
+        self.past_key_values: Optional[ReferenceType[Cache]] = None
+    def update(self, *args, **kwargs) -> Tuple[Tensor, Tensor]:
+        return self(*args, **kwargs)
+    def forward(
+        self,
+        key_states: Tensor,
+        value_states: Tensor,
+        *args,
+        **kwargs,
+    ) -> Tuple[Tensor, Tensor]:
+        # quantization
+        module = self.attn_module()
+        quant_args_attr = "quantization_scheme.input_activations"
+        quant_args = getattr_chain(module, quant_args_attr, None)
+        quant_enabled = getattr(module, "quantization_enabled", True)
+        if quant_args is not None and quant_enabled:
+            key_states = forward_quantize(module, key_states, "k", quant_args)
+            value_states = forward_quantize(module, value_states, "v", quant_args)
+        # original cache
+        if self.past_key_values is not None:
+            ret = self.past_key_values().update(
+                key_states, value_states, *args, **kwargs
+            )
+        else:
+            ret = (key_states, value_states)
+        self.past_key_values = None
+        return ret
+    def add_past_key_values(self, past_key_values: Optional[Cache]):
+        if past_key_values is not None:
+            self.past_key_values = ref(past_key_values)
+        else:
+            self.past_key_values = None
+# ----- initialize ----- #
+def _kv_cache_attention_hook(
+    module: Module, args: List[Any], kwargs: Dict[str, Any]
+) -> Tuple[List[Any], Dict[str, Any]]:
+    """
+    Hook which should be called before each quantized attention forward pass.
+    This hook dynamically replaces the `past_key_values` kwarg to the attention
+    forward function.
+    The original kvcache object is assigned to QuantizedKVCache().past_key_values
+    as a weakref to maintain original cache functionality and compute savings
+    """
+    _past_kv_name = (
+        "past_key_values"  # transformers#39956
+        if "past_key_values" in inspect.signature(module.forward).parameters
+        else "past_key_value"
+    )
+    past_key_values: Optional[Cache] = kwargs.get(_past_kv_name, None)
+    cache: QuantizedKVCache = getattr(module, KV_CACHE_ATTR)
+    cache.add_past_key_values(past_key_values)
+    kwargs[_past_kv_name] = cache
+    return args, kwargs
+def initialize_hooked_kv_cache(model: PreTrainedModel, module: Module):
+    """
+    Initialize a `QuantizedKVCache` instance attached to attention
+    :param model: parent model of attention module
+    :param module: attention module to initialize with
+    """
+    if not hasattr(module, KV_CACHE_ATTR):
+        module.register_module(KV_CACHE_ATTR, QuantizedKVCache(model.config, module))
+        module.register_forward_pre_hook(_kv_cache_attention_hook, with_kwargs=True)
+# ----- hooks ----- #
+def register_key_hook(
+    module: Module, hook: Callable[[Module, Tensor], Optional[Tensor]]
+) -> RemovableHandle:
+    """
+    Register a hook which takes post-rope key states as an argument and
+    returns the modified key states or `None`
+    :param module: attention module to add hook to
+    :param hook: key hook function
+    """
+    kv_cache: QuantizedKVCache = getattr(module, KV_CACHE_ATTR)
+    def _hook(cache: QuantizedKVCache, args, kwargs):
+        bound = inspect.signature(cache.forward).bind(*args, **kwargs)
+        value = hook(module, bound.arguments["key_states"])
+        if value is not None:
+            bound.arguments["key_states"] = value
+        return bound.args, bound.kwargs
+    return kv_cache.register_forward_pre_hook(_hook, with_kwargs=True)
+def register_value_hook(
+    module: Module, hook: Callable[[Module, Tensor], Optional[Tensor]]
+) -> RemovableHandle:
+    """
+    Register a hook which takes value states as an argument and
+    returns the modified value states or `None`
+    :param module: attention module to add hook to
+    :param hook: value hook function
+    """
+    kv_cache: QuantizedKVCache = getattr(module, KV_CACHE_ATTR)
+    def _hook(cache: QuantizedKVCache, args, kwargs):
+        bound = inspect.signature(cache.forward).bind(*args, **kwargs)
+        value = hook(module, bound.arguments["value_states"])
+        if value is not None:
+            bound.arguments["value_states"] = value
+        return bound.args, bound.kwargs
+    return kv_cache.register_forward_pre_hook(_hook, with_kwargs=True)

compressed_tensors/quantization/lifecycle/apply.py CHANGED Viewed

@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import logging
 from collections import OrderedDict
 from copy import deepcopy
 from typing import Dict, List, Optional
@@ -21,8 +20,13 @@ from typing import Union
 import torch
 from compressed_tensors.config import CompressionFormat
+from compressed_tensors.modeling import (
+    initialize_hooked_attention,
+    initialize_hooked_kv_cache,
+)
 from compressed_tensors.quantization.lifecycle.initialize import (
     initialize_module_for_quantization,
+    is_attention_module,
 )
 from compressed_tensors.quantization.quant_args import QuantizationArgs
 from compressed_tensors.quantization.quant_config import (
@@ -30,14 +34,15 @@ from compressed_tensors.quantization.quant_config import (
     QuantizationStatus,
 )
 from compressed_tensors.quantization.quant_scheme import QuantizationScheme
-from compressed_tensors.quantization.utils import (
-    KV_CACHE_TARGETS,
-    is_kv_cache_quant_scheme,
-)
 from compressed_tensors.utils.helpers import replace_module
-from compressed_tensors.utils.match import match_named_modules, match_targets
+from compressed_tensors.utils.match import (
+    is_narrow_match,
+    match_named_modules,
+    match_targets,
+)
 from compressed_tensors.utils.offload import update_parameter_data
 from compressed_tensors.utils.safetensors_load import get_safetensors_folder
+from loguru import logger
 from safetensors import safe_open
 from torch.nn import Module
@@ -53,9 +58,6 @@ from compressed_tensors.utils.safetensors_load import (
 )
-_LOGGER = logging.getLogger(__name__)
 def load_pretrained_quantization_parameters(
     model: Module,
     model_name_or_path: Optional[str] = None,
@@ -125,8 +127,14 @@ def apply_quantization_config(
     if config is None:  # see PR #180
         return dict()
-    # preprocess to support kv cache scheme
-    config = process_quantization_config(config)
+    # force zero points during initialization
+    force_zero_point = config.quantization_status != QuantizationStatus.COMPRESSED
+    # apply and initialize kv cache quantization
+    if config.kv_cache_scheme is not None:
+        _apply_kv_cache_scheme(
+            model, config.kv_cache_scheme, config.quantization_status
+        )
     # build mapping of targets to schemes for easier matching
     # use ordered dict to preserve target ordering in config
@@ -162,49 +170,40 @@ def apply_quantization_config(
             replace_module(model, name, compressed_linear)
         else:
+            if is_attention_module(submodule) and is_narrow_match(
+                model, scheme.targets, name
+            ):
+                initialize_hooked_attention(model, submodule)
             initialize_module_for_quantization(
                 submodule,
-                force_zero_point=config.quantization_status
-                != QuantizationStatus.COMPRESSED,
+                force_zero_point=force_zero_point,
             )
         submodule.quantization_status = config.quantization_status
-def process_quantization_config(config: QuantizationConfig) -> QuantizationConfig:
-    """
-    Preprocess the raw QuantizationConfig
-    :param config: the raw QuantizationConfig
-    :return: the processed QuantizationConfig
-    """
-    if config.kv_cache_scheme is not None:
-        config = process_kv_cache_config(config)
-    return config
-def process_kv_cache_config(
-    config: QuantizationConfig, targets: Union[List[str], str] = KV_CACHE_TARGETS
-) -> QuantizationConfig:
-    """
-    Reformulate the `config.kv_cache` as a `config_group`
-    and add it to the set of existing `config.groups`
-    :param config: the QuantizationConfig
-    :return: the QuantizationConfig with additional "kv_cache" group
-    """
-    if targets == KV_CACHE_TARGETS:
-        _LOGGER.info(f"KV cache targets set to default value of: {KV_CACHE_TARGETS}")
-    kv_cache_dict = config.kv_cache_scheme.model_dump()
-    kv_cache_scheme = QuantizationScheme(
-        output_activations=QuantizationArgs(**kv_cache_dict),
-        targets=targets,
+def _apply_kv_cache_scheme(
+    model: torch.nn.Module,
+    kv_cache_scheme: QuantizationArgs,
+    status: QuantizationStatus,
+):
+    if not kv_cache_scheme.symmetric:
+        raise logger.warning("vLLM does not support asymmetric kv cache quantization")
+    # applies and initializes kv cache quantization
+    # this step cannot come after attention apply/initialize
+    # otherwise it will override the attention qparams
+    scheme = QuantizationScheme(
+        targets=[".*self_attn$"],  # is never read in practice
+        input_activations=kv_cache_scheme,
     )
-    kv_cache_group = dict(kv_cache=kv_cache_scheme)
-    config.config_groups.update(kv_cache_group)
-    return config
+    for submodule in model.modules():
+        if is_attention_module(submodule):
+            submodule.quantization_scheme = scheme
+            initialize_hooked_kv_cache(model, submodule)
+            initialize_module_for_quantization(submodule, force_zero_point=False)
+            submodule.quantization_status = status
 def _load_quant_args_from_mapping(
@@ -256,60 +255,6 @@ def _scheme_from_targets(
     targets: List[str],
     name: str,
 ) -> QuantizationScheme:
-    if len(targets) == 1:
-        # if `targets` iterable contains a single element
-        # use it as the key
-        return target_to_scheme[targets[0]]
-    # otherwise, we need to merge QuantizationSchemes corresponding
-    # to multiple targets. This is most likely because `name` module
-    # is being target both as an ordinary quantization target, as well
-    # as kv cache quantization target
-    schemes_to_merge = [target_to_scheme[target] for target in targets]
-    return _merge_schemes(schemes_to_merge, name)
-def _merge_schemes(
-    schemes_to_merge: List[QuantizationScheme], name: str
-) -> QuantizationScheme:
-    kv_cache_quantization_scheme = [
-        scheme for scheme in schemes_to_merge if is_kv_cache_quant_scheme(scheme)
-    ]
-    if not kv_cache_quantization_scheme:
-        # if the schemes_to_merge do not contain any
-        # kv cache QuantizationScheme
-        # return the first scheme (the prioritized one,
-        # since the order of schemes_to_merge matters)
-        return schemes_to_merge[0]
-    else:
-        # fetch the kv cache QuantizationScheme and the highest
-        # priority non-kv cache QuantizationScheme and merge them
-        kv_cache_quantization_scheme = kv_cache_quantization_scheme[0]
-        quantization_scheme = [
-            scheme
-            for scheme in schemes_to_merge
-            if not is_kv_cache_quant_scheme(scheme)
-        ][0]
-        schemes_to_merge = [kv_cache_quantization_scheme, quantization_scheme]
-        merged_scheme = {}
-        for scheme in schemes_to_merge:
-            scheme_dict = {
-                k: v for k, v in scheme.model_dump().items() if v is not None
-            }
-            # when merging multiple schemes, the final target will be
-            # the `name` argument - hence erase the original targets
-            del scheme_dict["targets"]
-            # make sure that schemes do not "clash" with each other
-            overlapping_keys = set(merged_scheme.keys()) & set(scheme_dict.keys())
-            if overlapping_keys:
-                raise ValueError(
-                    f"The module: {name} is being modified by two clashing "
-                    f"quantization schemes, that jointly try to override "
-                    f"properties: {overlapping_keys}. Fix the quantization config "
-                    "so that it is not ambiguous."
-                )
-            merged_scheme.update(scheme_dict)
-        merged_scheme.update(targets=[name])
-        return QuantizationScheme(**merged_scheme)
+    # return the first scheme (the prioritized one,
+    # since the order of target_to_scheme matters)
+    return target_to_scheme[targets[0]]

compressed-tensors 0.12.3a20251013__py3-none-any.whl → 0.12.3a20251028__py3-none-any.whl

compressed-tensors 0.12.3a20251013py3-none-any.whl → 0.12.3a20251028py3-none-any.whl