PyPI - keras-nightly - Versions diffs - 3.12.0.dev2025083103__py3-none-any.whl → 3.14.0.dev2026011604__py3-none-any.whl - Mend

keras-nightly 3.12.0.dev2025083103py3-none-any.whl → 3.14.0.dev2026011604py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (164) hide show

keras/__init__.py +1 -0
keras/_tf_keras/keras/__init__.py +1 -0
keras/_tf_keras/keras/callbacks/__init__.py +3 -0
keras/_tf_keras/keras/distillation/__init__.py +16 -0
keras/_tf_keras/keras/distribution/__init__.py +3 -0
keras/_tf_keras/keras/dtype_policies/__init__.py +6 -0
keras/_tf_keras/keras/layers/__init__.py +21 -0
keras/_tf_keras/keras/ops/__init__.py +16 -0
keras/_tf_keras/keras/ops/image/__init__.py +1 -0
keras/_tf_keras/keras/ops/linalg/__init__.py +1 -0
keras/_tf_keras/keras/ops/nn/__init__.py +3 -0
keras/_tf_keras/keras/ops/numpy/__init__.py +12 -0
keras/_tf_keras/keras/quantizers/__init__.py +13 -0
keras/callbacks/__init__.py +3 -0
keras/distillation/__init__.py +16 -0
keras/distribution/__init__.py +3 -0
keras/dtype_policies/__init__.py +6 -0
keras/layers/__init__.py +21 -0
keras/ops/__init__.py +16 -0
keras/ops/image/__init__.py +1 -0
keras/ops/linalg/__init__.py +1 -0
keras/ops/nn/__init__.py +3 -0
keras/ops/numpy/__init__.py +12 -0
keras/quantizers/__init__.py +13 -0
keras/src/applications/imagenet_utils.py +4 -1
keras/src/backend/common/backend_utils.py +30 -6
keras/src/backend/common/dtypes.py +6 -12
keras/src/backend/common/name_scope.py +2 -1
keras/src/backend/common/variables.py +38 -20
keras/src/backend/jax/core.py +126 -78
keras/src/backend/jax/distribution_lib.py +16 -2
keras/src/backend/jax/layer.py +3 -1
keras/src/backend/jax/linalg.py +4 -0
keras/src/backend/jax/nn.py +511 -29
keras/src/backend/jax/numpy.py +109 -23
keras/src/backend/jax/optimizer.py +3 -2
keras/src/backend/jax/trainer.py +18 -3
keras/src/backend/numpy/linalg.py +4 -0
keras/src/backend/numpy/nn.py +313 -2
keras/src/backend/numpy/numpy.py +97 -8
keras/src/backend/openvino/__init__.py +1 -0
keras/src/backend/openvino/core.py +6 -23
keras/src/backend/openvino/linalg.py +4 -0
keras/src/backend/openvino/nn.py +271 -20
keras/src/backend/openvino/numpy.py +1369 -195
keras/src/backend/openvino/random.py +7 -14
keras/src/backend/tensorflow/layer.py +43 -9
keras/src/backend/tensorflow/linalg.py +24 -0
keras/src/backend/tensorflow/nn.py +545 -1
keras/src/backend/tensorflow/numpy.py +351 -56
keras/src/backend/tensorflow/trainer.py +6 -2
keras/src/backend/torch/core.py +3 -1
keras/src/backend/torch/linalg.py +4 -0
keras/src/backend/torch/nn.py +125 -0
keras/src/backend/torch/numpy.py +109 -9
keras/src/backend/torch/trainer.py +8 -2
keras/src/callbacks/__init__.py +1 -0
keras/src/callbacks/callback_list.py +45 -11
keras/src/callbacks/model_checkpoint.py +5 -0
keras/src/callbacks/orbax_checkpoint.py +332 -0
keras/src/callbacks/terminate_on_nan.py +54 -5
keras/src/datasets/cifar10.py +5 -0
keras/src/distillation/__init__.py +1 -0
keras/src/distillation/distillation_loss.py +390 -0
keras/src/distillation/distiller.py +598 -0
keras/src/distribution/distribution_lib.py +14 -0
keras/src/dtype_policies/__init__.py +4 -0
keras/src/dtype_policies/dtype_policy.py +180 -1
keras/src/export/__init__.py +2 -0
keras/src/export/export_utils.py +39 -2
keras/src/export/litert.py +248 -0
keras/src/export/onnx.py +6 -0
keras/src/export/openvino.py +1 -1
keras/src/export/tf2onnx_lib.py +3 -0
keras/src/layers/__init__.py +13 -0
keras/src/layers/activations/softmax.py +9 -4
keras/src/layers/attention/attention.py +1 -1
keras/src/layers/attention/multi_head_attention.py +4 -1
keras/src/layers/core/dense.py +406 -102
keras/src/layers/core/einsum_dense.py +521 -116
keras/src/layers/core/embedding.py +257 -99
keras/src/layers/core/input_layer.py +1 -0
keras/src/layers/core/reversible_embedding.py +399 -0
keras/src/layers/input_spec.py +17 -17
keras/src/layers/layer.py +50 -15
keras/src/layers/merging/concatenate.py +6 -5
keras/src/layers/merging/dot.py +4 -1
keras/src/layers/pooling/adaptive_average_pooling1d.py +65 -0
keras/src/layers/pooling/adaptive_average_pooling2d.py +62 -0
keras/src/layers/pooling/adaptive_average_pooling3d.py +63 -0
keras/src/layers/pooling/adaptive_max_pooling1d.py +65 -0
keras/src/layers/pooling/adaptive_max_pooling2d.py +62 -0
keras/src/layers/pooling/adaptive_max_pooling3d.py +63 -0
keras/src/layers/pooling/base_adaptive_pooling.py +63 -0
keras/src/layers/preprocessing/discretization.py +6 -5
keras/src/layers/preprocessing/feature_space.py +8 -4
keras/src/layers/preprocessing/image_preprocessing/aug_mix.py +2 -2
keras/src/layers/preprocessing/image_preprocessing/bounding_boxes/validation.py +5 -5
keras/src/layers/preprocessing/image_preprocessing/random_contrast.py +3 -3
keras/src/layers/preprocessing/image_preprocessing/resizing.py +10 -0
keras/src/layers/preprocessing/index_lookup.py +19 -1
keras/src/layers/preprocessing/normalization.py +16 -1
keras/src/layers/preprocessing/string_lookup.py +26 -28
keras/src/layers/regularization/dropout.py +43 -1
keras/src/layers/rnn/gru.py +1 -1
keras/src/layers/rnn/lstm.py +2 -2
keras/src/layers/rnn/rnn.py +19 -0
keras/src/layers/rnn/simple_rnn.py +1 -1
keras/src/legacy/preprocessing/image.py +4 -1
keras/src/legacy/preprocessing/sequence.py +20 -12
keras/src/losses/loss.py +1 -1
keras/src/losses/losses.py +24 -0
keras/src/metrics/confusion_metrics.py +7 -6
keras/src/models/cloning.py +4 -0
keras/src/models/functional.py +11 -3
keras/src/models/model.py +195 -44
keras/src/ops/image.py +257 -20
keras/src/ops/linalg.py +93 -0
keras/src/ops/nn.py +268 -2
keras/src/ops/numpy.py +701 -44
keras/src/ops/operation.py +90 -29
keras/src/ops/operation_utils.py +2 -0
keras/src/optimizers/adafactor.py +29 -10
keras/src/optimizers/base_optimizer.py +22 -3
keras/src/optimizers/loss_scale_optimizer.py +51 -18
keras/src/optimizers/muon.py +65 -31
keras/src/optimizers/schedules/learning_rate_schedule.py +4 -3
keras/src/quantizers/__init__.py +14 -1
keras/src/quantizers/awq.py +361 -0
keras/src/quantizers/awq_config.py +140 -0
keras/src/quantizers/awq_core.py +217 -0
keras/src/quantizers/gptq.py +346 -207
keras/src/quantizers/gptq_config.py +63 -13
keras/src/quantizers/gptq_core.py +328 -215
keras/src/quantizers/quantization_config.py +246 -0
keras/src/quantizers/quantizers.py +407 -38
keras/src/quantizers/utils.py +23 -0
keras/src/random/seed_generator.py +6 -4
keras/src/saving/file_editor.py +81 -6
keras/src/saving/orbax_util.py +26 -0
keras/src/saving/saving_api.py +37 -14
keras/src/saving/saving_lib.py +1 -1
keras/src/testing/__init__.py +1 -0
keras/src/testing/test_case.py +45 -5
keras/src/trainers/compile_utils.py +38 -17
keras/src/trainers/data_adapters/grain_dataset_adapter.py +1 -5
keras/src/tree/torchtree_impl.py +215 -0
keras/src/tree/tree_api.py +6 -1
keras/src/utils/backend_utils.py +31 -4
keras/src/utils/dataset_utils.py +234 -35
keras/src/utils/file_utils.py +49 -11
keras/src/utils/image_utils.py +14 -2
keras/src/utils/jax_layer.py +244 -55
keras/src/utils/module_utils.py +29 -0
keras/src/utils/progbar.py +10 -12
keras/src/utils/python_utils.py +5 -0
keras/src/utils/rng_utils.py +9 -1
keras/src/utils/tracking.py +70 -5
keras/src/version.py +1 -1
{keras_nightly-3.12.0.dev2025083103.dist-info → keras_nightly-3.14.0.dev2026011604.dist-info}/METADATA +16 -6
{keras_nightly-3.12.0.dev2025083103.dist-info → keras_nightly-3.14.0.dev2026011604.dist-info}/RECORD +163 -142
keras/src/quantizers/gptq_quant.py +0 -133
{keras_nightly-3.12.0.dev2025083103.dist-info → keras_nightly-3.14.0.dev2026011604.dist-info}/WHEEL +0 -0
{keras_nightly-3.12.0.dev2025083103.dist-info → keras_nightly-3.14.0.dev2026011604.dist-info}/top_level.txt +0 -0

keras/src/quantizers/gptq_core.py CHANGED Viewed

@@ -1,23 +1,121 @@
-import random
+import math
+from contextlib import contextmanager
 import numpy as np
 from absl import logging
 from keras.src import ops
 from keras.src import utils as keras_utils
+from keras.src.dtype_policies.dtype_policy import GPTQDTypePolicy
+from keras.src.dtype_policies.dtype_policy_map import DTypePolicyMap
 from keras.src.layers import Dense
 from keras.src.layers import EinsumDense
-from keras.src.layers import Embedding
 from keras.src.quantizers.gptq import GPTQ
-from keras.src.quantizers.gptq_quant import GPTQQuantization
+from keras.src.quantizers.gptq_config import GPTQConfig
+from keras.src.quantizers.utils import should_quantize_layer
-def get_dataloader(tokenizer, sequence_length, dataset, num_samples=128):
+@contextmanager
+def stream_hessians(layers_map, gptq_objects):
     """
-    Prepares and chunks the calibration dataloader, repeating short datasets.
+    Temporarily monkey-patch each target layer's `call` method so
+    that input activations are streamed into the GPTQ instance
+    running Hessian estimate at capture time.
+    On `__enter__`: For every (name, layer) in `layers_map`, replaces
+     `layer.call` with a wrapper that:
+     1) extracts the layer input from `*args`/`**kwargs`,
+     2) reshapes it to 2D `[-1, rows]` where
+      `rows = gptq_objects[name].rows`,
+     3) calls `gptq_objects[name].update_hessian_with_batch(x2d)`
+     4) delegates to the original `layer.call` and returns its
+      output.
+    On `__exit__`: All original `layer.call` methods are restored even if an
+     exception occurs.
+    * Space complexity: O(d**2) per layer (for the Hessian).
+    * No weights are modified; only GPTQ statistics are updated.
+    Args:
+        layers_map: Dict[str, Layer]. Mapping from logical layer names to
+         the Keras layers that should be patched during calibration. Keys must
+         match `gptq_objects`.
+        gptq_objects: Dict[str, GPTQ]. Mapping from names to GPTQ instances.
+    Yields:
+        None: The patched state is active only within the `with` block. After
+         exit, all layers are unpatched and safe to use normally.
+    Example:
+    ```python
+    >>> with stream_hessians(layers_map, gptq_objects):
+    ...     for sample in calibration_inputs:
+    ...         if len(sample.shape) == 2:
+    ...             sample = ops.expand_dims(sample, 0)
+    ...         _ = block(sample)   # hooks update Hessians on-the-fly
+    >>> # <- original layer.call methods restored here
+    ```
     """
-    all_tokens = []
+    original_calls = {}
+    def create_hook(name, original_call_func):
+        def hook(*args, **kwargs):
+            inp = args[0] if args else kwargs["inputs"]
+            # Explicitly reshape the input tensor to be 2D, with the
+            # second dimension matching the number of input features
+            # expected by the layer's kernel.
+            # This correctly handles inputs of any dimensionality
+            # (e.g., 3D or 4D).
+            num_features = gptq_objects[name].rows
+            input_2d = ops.reshape(inp, (-1, num_features))
+            gptq_objects[name].update_hessian_with_batch(input_2d)
+            return original_call_func(*args, **kwargs)
+        return hook
+    try:
+        for name, layer in layers_map.items():
+            original_calls[name] = layer.call
+            layer.call = create_hook(name, layer.call)
+        yield
+    finally:
+        for name, layer in layers_map.items():
+            layer.call = original_calls[name]
+def get_dataloader(
+    tokenizer,
+    sequence_length,
+    dataset,
+    num_samples=128,
+    *,
+    strategy="strided",
+    seed=42,
+    stride=None,
+    eos_id=None,
+):
+    """
+    Prepares and chunks the calibration dataloader, repeating short datasets.
+    All processing happens on the CPU.
+    Args:
+        tokenizer: The tokenizer to use for text splitting.
+        sequence_length: The length of each input sequence.
+        dataset: The dataset to sample from.
+        num_samples: The number of samples to generate.
+        strategy: The sampling strategy to use. Possible values are
+         1. "strided": Samples are taken at regular intervals.
+         2. "linspace": Samples are taken at evenly spaced intervals.
+         3. "random": Samples are taken at random positions.
+        seed: The random seed for reproducibility. Used only if
+         strategy="random"
+        stride: The stride length for "strided" sampling.
+        eos_id: The end-of-sequence token ID.
+    Returns:
+        np.ndarray of shape (num_samples, 1, sequence_length), dtype int32.
+    """
     if not hasattr(dataset, "__iter__") or isinstance(dataset, (str, bytes)):
         raise TypeError(
             "The `dataset` argument must be an iterable (e.g., a list of "
@@ -27,267 +125,184 @@ def get_dataloader(tokenizer, sequence_length, dataset, num_samples=128):
         )
     dataset_list = list(dataset)
     if not dataset_list:
         raise ValueError("Provided dataset is empty.")
+    pieces = []
     if isinstance(dataset_list[0], str):
-        logging.info("(Dataset contains strings, tokenizing now...)")
-        full_text = "\n\n".join(dataset_list)
-        all_tokens = tokenizer.tokenize(full_text)
+        for i, s in enumerate(dataset_list):
+            toks = ops.convert_to_numpy(tokenizer.tokenize(s)).reshape(-1)
+            pieces.append(toks)
+            # avoid windows that span document boundaries
+            if eos_id is not None and i < len(dataset_list) - 1:
+                pieces.append(np.array([eos_id], dtype=np.int32))
     else:
-        logging.info("(Dataset is pre-tokenized, concatenating...)")
-        all_tokens = np.concatenate(
-            [ops.convert_to_numpy(s).reshape(-1) for s in dataset_list], axis=0
-        )
-    all_tokens = np.array(all_tokens, dtype=np.int32)
+        for s in dataset_list:
+            toks = ops.convert_to_numpy(s).reshape(-1)
+            pieces.append(toks.astype(np.int32, copy=False))
+    all_tokens = (
+        pieces[0].astype(np.int32, copy=False)
+        if len(pieces) == 1
+        else np.concatenate(pieces, axis=0).astype(np.int32, copy=False)
+    )
-    # Repeat data if it's too short
     required_tokens = num_samples * sequence_length
-    if len(all_tokens) < required_tokens:
-        logging.info(
-            f"Warning: Dataset is too short ({len(all_tokens)} tokens)."
-            " Repeating data to generate {num_samples} samples."
-        )
-        repeats = -(-required_tokens // len(all_tokens))  # Ceiling division
+    if all_tokens.size < required_tokens:
+        repeats = math.ceil(required_tokens / max(1, all_tokens.size))
         all_tokens = np.tile(all_tokens, repeats)
-    # Chunk the token list into samples
-    calibration_samples = []
-    for _ in range(num_samples):
-        # Generate a random starting index
-        start_index = random.randint(0, len(all_tokens) - sequence_length - 1)
-        end_index = start_index + sequence_length
-        sample = all_tokens[start_index:end_index]
-        calibration_samples.append(np.reshape(sample, (1, sequence_length)))
-    final_array = np.stack(calibration_samples, axis=0)
-    return final_array
+    max_start = all_tokens.size - sequence_length
+    if max_start < 0:
+        raise ValueError(
+            f"Not enough tokens to form one sample of length {sequence_length} "
+            f"(have {all_tokens.size})."
+        )
-def _find_layers_recursive(layer, prefix, found_layers):
-    """
-    Recursively search for Dense and EinsumDense layers and record them.
-    """
-    for sub_layer in layer._layers:
-        # Construct a unique name for the layer based on its hierarchy
-        layer_name = f"{prefix}.{sub_layer.name}"
-        if isinstance(sub_layer, (Dense, EinsumDense)):
-            found_layers[layer_name] = sub_layer
+    # Choose deterministic, well-spread starts by default
+    if strategy == "random":
+        rng = np.random.default_rng(seed)
+        starts = rng.integers(
+            0, max_start + 1, size=num_samples, dtype=np.int64
+        )
+    elif strategy == "linspace":
+        # even coverage with no RNG
+        starts = np.linspace(0, max_start, num_samples, dtype=np.int64)
+    elif strategy == "strided":
+        # stride chosen to cover the space roughly uniformly
+        if stride is None:
+            stride = max(1, (max_start + 1) // num_samples)
+        # offset derived deterministically from seed
+        offset = (
+            (abs(hash(("gptq-calib", seed))) % (max_start + 1))
+            if max_start > 0
+            else 0
+        )
+        starts = (offset + np.arange(num_samples, dtype=np.int64) * stride) % (
+            max_start + 1
+        )
+    else:
+        raise ValueError(f"Unknown strategy: {strategy}")
-        # Recurse into nested layers that are not the target types
-        elif hasattr(sub_layer, "_layers") and sub_layer._layers:
-            _find_layers_recursive(sub_layer, layer_name, found_layers)
+    # Gather contiguous windows
+    # sliding_window_view avoids building a big index matrix
+    windows = np.lib.stride_tricks.sliding_window_view(
+        all_tokens, sequence_length
+    )
+    samples = windows[starts]  # (num_samples, sequence_length)
+    return samples.astype(np.int32)[:, None, :]
 def find_layers_in_block(block):
     """
-    A pluggable, generic function to find all Dense and EinsumDense layers
-    within any transformer block by using a recursive search.
+    Finds all Dense and EinsumDense layers in a transformer block.
+    Args:
+        block: A Keras layer representing a transformer block.
+    Returns:
+        A dict mapping layer paths to the corresponding Dense or EinsumDense
     """
     found_layers = {}
-    # Start the recursive search from the block itself
-    _find_layers_recursive(block, "block", found_layers)
+    for sub_layer in block._flatten_layers():
+        if len(list(sub_layer._flatten_layers())) == 1:
+            if isinstance(sub_layer, (Dense, EinsumDense)):
+                found_layers[sub_layer.path] = sub_layer
     return found_layers
-def apply_gptq_layerwise(
-    model,
-    dataloader,
-    num_samples,
-    hessian_damping,
-    group_size,
-    symmetric,
-    activation_order,
-    weight_bits,
-):
+def apply_gptq_layerwise(dataloader, config, structure, filters=None):
     """Applies GPTQ quantization layer-by-layer to a Keras model.
-    This function is designed to work with common transformer architectures,
-    like those provided by KerasHub. It automatically discovers the model's
-    structure by first looking for the standard format: a `model.backbone`
-    attribute that contains a `transformer_layers` list.
-    If a standard backbone is not found, it falls back to a heuristic for
-    custom models, where it assumes the first `keras.layers.Embedding` layer
-    is the input embedding and any subsequent container layers are the
-    transformer blocks to be quantized.
+    This function uses the provided `structure` to identify pre-quantization
+    layers and sequential blocks.
     The core logic operates as follows:
-    1.  It automatically detects the model's structure, identifying the main
-        embedding layer and a sequence of transformer blocks.
-    2.  It processes the model sequentially, one block at a time. For each
+    1.  It processes the model sequentially, one block at a time. For each
         block, it uses temporary hooks to capture the input activations of
         each target layer during a forward pass with the calibration data.
-    3.  These captured activations are used to compute the Hessian matrix for
+    2.  These captured activations are used to compute the Hessian matrix for
         each layer's weights.
-    4.  The GPTQ algorithm is then applied to each layer to find the optimal
+    3.  The GPTQ algorithm is then applied to each layer to find the optimal
         quantized weights that minimize the error introduced.
-    5.  The output activations from the current block are then used as the
+    4.  The output activations from the current block are then used as the
         input for the next block, ensuring that quantization errors are
         accounted for throughout the model.
     Args:
-        model: The Keras model instance to be quantized. The function will
-            attempt to automatically discover its structure.
-        dataloader: An iterable providing calibration data. Each item should
-            be a batch of token IDs suitable for the model's embedding layer.
-        num_samples: (int) The number of samples from the dataloader to use for
-            calibration.
-        hessian_damping: (float) The percentage of dampening to add to the
-            Hessian diagonal for stabilization during inverse calculation.
-            A value of 0.01 is common.
-        group_size: (int) The size of the groups to use for quantization. A
-            value of 128 means that 128 weights will share the same scaling
-            factor. Use -1 for per-channel quantization.
-        symmetric: (bool) If True, symmetric quantization is used. Otherwise,
-            asymmetric quantization is used.
-        activation_order: (bool) If True, reorders the weight columns based on
-            activation magnitude, which can improve quantization accuracy.
-        weight_bits: (int) The number of bits to use for the quantized weights,
-            e.g., 4 for 4-bit quantization.
+        dataloader: An iterable providing calibration data.
+        config: A GPTQConfiguration object.
+        structure: A dictionary with keys "pre_block_layers" and
+            "sequential_blocks".
+        filters: Optional filters to exclude layers from quantization.
     Raises:
         ValueError: If the function cannot automatically find an embedding
             layer or any transformer-like blocks to quantize within the model.
     """
+    num_samples = config.num_samples
     logging.info("Starting model quantization...")
-    embedding_layer = None
-    transformer_blocks = []
-    if hasattr(model, "backbone"):
-        logging.info("Detected KerasHub model structure.")
-        backbone = model.backbone
-        # Add the check for the 'transformer_layers' attribute.
-        if hasattr(backbone, "transformer_layers"):
-            transformer_blocks = backbone.transformer_layers
-        else:
-            # Raise a specific error if the attribute is missing.
-            raise ValueError(
-                "The model's backbone does not have a 'transformer_layers' "
-                "attribute. Please ensure you are using a standard KerasHub "
-                "transformer model."
-            )
-        # Find the embedding layer by checking for common names or by type.
-        if hasattr(backbone, "token_embedding"):
-            embedding_layer = backbone.token_embedding
-        elif hasattr(backbone, "embedding"):
-            embedding_layer = backbone.embedding
-        else:
-            raise ValueError(
-                "Could not automatically find an embedding layer in the model."
-            )
-    else:
-        logging.info("Detected custom model structure.")
-        for layer in model.layers:
-            # The first Embedding layer found is assumed to be the main one.
-            if isinstance(layer, Embedding) and embedding_layer is None:
-                embedding_layer = layer
-            # A "block" is a container-like layer with its own sub-layers
-            # that we can quantize. This is a heuristic that works for the
-            # test.
-            elif hasattr(layer, "_layers") and layer._layers:
-                transformer_blocks.append(layer)
-    if embedding_layer is None:
-        raise ValueError(
-            "Could not automatically find an embedding layer in the model."
-        )
+    pre_layers = structure.get("pre_block_layers", [])
+    transformer_blocks = structure.get("sequential_blocks", [])
     if not transformer_blocks:
         raise ValueError(
-            "Could not automatically find any transformer-like blocks to "
-            "quantize."
+            "No sequential blocks found in the provided structure to quantize."
         )
-    # Initial inputs are the outputs of the token embedding layer
-    inputs = [
-        embedding_layer(ops.convert_to_tensor(batch, dtype="int32"))
-        for batch in dataloader
-    ]
+    # Initial inputs are the outputs of the pre-block layers
+    inputs = []
+    for batch in dataloader:
+        batch = ops.convert_to_tensor(batch, dtype="int32")
+        for layer in pre_layers:
+            batch = layer(batch)
+        inputs.append(batch)
+    num_samples = min(num_samples, len(inputs))
     progbar = keras_utils.Progbar(target=len(transformer_blocks))
     for block_idx, block in enumerate(transformer_blocks):
         logging.info(f"Quantizing Block {block_idx}")
         sub_layers_map = find_layers_in_block(block)
+        # Filter out layers that are not quantized with GPTQ
+        final_sub_layers_map = {}
+        for name, layer in sub_layers_map.items():
+            if not should_quantize_layer(layer, filters):
+                continue
+            final_sub_layers_map[name] = layer
+        sub_layers_map = final_sub_layers_map
         if not sub_layers_map:
             logging.info(
-                f"  No Dense or EinsumDense layers found in block {block_idx}. "
-                "Skipping."
+                f"  No quantizable layers found in block {block_idx}. Skipping."
             )
         else:
             logging.info(f"Found layers: {list(sub_layers_map.keys())}")
             gptq_objects = {
-                name: GPTQ(layer) for name, layer in sub_layers_map.items()
+                name: GPTQ(layer, config)
+                for name, layer in sub_layers_map.items()
             }
-            captured_inputs = {name: [] for name in sub_layers_map.keys()}
-            original_calls = {}
-            def create_hook(name, original_call_func):
-                """A factory for creating a hook to capture layer inputs."""
-                def hook(*args, **kwargs):
-                    if args:
-                        inp = args[0]
-                    else:
-                        inp = kwargs["inputs"]
-                    captured_inputs[name].append(inp)
-                    return original_call_func(*args, **kwargs)
-                return hook
-            try:
-                for name, layer in sub_layers_map.items():
-                    original_call = layer.call
-                    original_calls[name] = original_call
-                    layer.call = create_hook(name, original_call)
-                logging.info(f"Capturing activations for block {block_idx}...")
+            with stream_hessians(sub_layers_map, gptq_objects):
                 for sample_idx in range(num_samples):
                     current_input = inputs[sample_idx]
                     if len(current_input.shape) == 2:
                         current_input = ops.expand_dims(current_input, axis=0)
                     _ = block(current_input)
-            finally:
-                for name, layer in sub_layers_map.items():
-                    if name in original_calls:
-                        layer.call = original_calls[name]
-            logging.info(f"Building Hessians for block {block_idx}...")
-            for name, gptq_object in gptq_objects.items():
-                layer_inputs = ops.concatenate(captured_inputs[name], axis=0)
-                # Explicitly reshape the input tensor to be 2D, with the second
-                # dimension matching the number of input features expected by
-                # the layer's kernel.
-                # This correctly handles inputs of any dimensionality
-                # (e.g., 3D or 4D).
-                num_features = gptq_object.rows
-                input_reshaped = ops.reshape(layer_inputs, (-1, num_features))
-                gptq_object.update_hessian_with_batch(input_reshaped)
-                quantizer = GPTQQuantization(
-                    weight_bits,
-                    per_channel=True,
-                    symmetric=symmetric,
-                    group_size=group_size,
-                )
             for name, gptq_object in gptq_objects.items():
                 logging.info(f"Quantizing {name}...")
-                gptq_object.quantizer = quantizer
-                gptq_object.quantize_and_correct_block(
-                    hessian_damping=hessian_damping,
-                    group_size=group_size,
-                    activation_order=activation_order,
-                )
+                gptq_object.quantize_and_correct_layer()
                 gptq_object.free()
-            del gptq_objects, captured_inputs, original_calls
+            del gptq_objects
         if block_idx < len(transformer_blocks) - 1:
             logging.info(f"Generating inputs for block {block_idx + 1}...")
@@ -304,32 +319,130 @@ def apply_gptq_layerwise(
     logging.info("Quantization process complete.")
-def quantize_model(model, config):
+def gptq_quantize(config, quantization_layer_structure, filters=None):
     """
-    Top-level function to quantize a Keras model using GPTQ.
+    Quantizes the model using GPTQ.
+    Args:
+        config: The GPTQ configuration.
+        quantization_layer_structure: A dictionary describing the model's layer
+        structure for quantization.
+        filters: Optional filters to exclude layers from quantization.
     """
-    logging.info("Starting GPTQ quantization process...")
+    if config.dataset is None or config.tokenizer is None:
+        raise ValueError(
+            "GPTQ quantization requires a dataset and a tokenizer. "
+            "Please provide them in the `GPTQConfig`."
+        )
-    # Load ALL data needed from the generator/source in a single call.
+    if quantization_layer_structure is None:
+        raise ValueError(
+            "For 'gptq' mode, a valid quantization structure must be provided "
+            "either via `config.quantization_layer_structure` or by overriding "
+            "`model.get_quantization_layer_structure(mode)`. The structure "
+            "should be a dictionary with keys 'pre_block_layers' and "
+            "'sequential_blocks'."
+        )
+    # Load all data needed from the generator/source in a single call.
     total_samples_to_request = config.num_samples
-    full_dataloader = get_dataloader(
+    dataloader = get_dataloader(
         config.tokenizer,
         config.sequence_length,
         config.dataset,
         num_samples=total_samples_to_request,
     )
-    # Split the materialized data. This works because full_dataloader
+    # Split the materialized data. This works because dataloader
     # is now a NumPy array, which can be sliced and reused.
-    calibration_dataloader = full_dataloader[: config.num_samples]
+    calibration_dataloader = dataloader[: config.num_samples]
     apply_gptq_layerwise(
-        model,
-        calibration_dataloader,  # Use the calibration slice
-        config.num_samples,  # Use the configured number of samples
-        config.hessian_damping,
-        config.group_size,
-        config.symmetric,
-        config.activation_order,
-        config.weight_bits,
+        calibration_dataloader,
+        config,
+        quantization_layer_structure,
+        filters=filters,
     )
+def get_group_size_for_layer(layer, config):
+    """Determine the group size for GPTQ quantization.
+    The group size can be specified either through the `config` argument
+    or through the `dtype_policy` if it is of type `GPTQDTypePolicy`.
+    The config argument is usually available when quantizing the layer
+    via the `quantize` method. If the layer was deserialized from a
+    saved model, the group size should be specified in the `dtype_policy`.
+    Args:
+        config: An optional configuration object that may contain the
+            `group_size` attribute.
+    Returns:
+        int. The determined group size for GPTQ quantization.
+    Raises:
+        ValueError: If the group size is not specified in either the
+            `config` or the `dtype_policy`.
+    """
+    if config and isinstance(config, GPTQConfig):
+        return config.group_size
+    elif isinstance(layer.dtype_policy, GPTQDTypePolicy):
+        return layer.dtype_policy.group_size
+    elif isinstance(layer.dtype_policy, DTypePolicyMap):
+        policy = layer.dtype_policy[layer.path]
+        if not isinstance(policy, GPTQDTypePolicy):
+            # This should never happen based on how we set the
+            # quantization mode, but we check just in case.
+            raise ValueError(
+                "Expected a `dtype_policy` of type `GPTQDTypePolicy`."
+                f"Got: {type(policy)}"
+            )
+        return policy.group_size
+    else:
+        raise ValueError(
+            "For GPTQ quantization, the group_size must be specified"
+            "either through a `dtype_policy` of type "
+            "`GPTQDTypePolicy` or the `config` argument."
+        )
+def get_weight_bits_for_layer(layer, config):
+    """Determine the number of weight bits for GPTQ quantization.
+    The number of weight bits can be specified either through the `config`
+    argument or through the `dtype_policy` if it is of type
+    `GPTQDTypePolicy`.
+    The config argument is usually available when quantizing the layer
+    via the `quantize` method. If the layer was deserialized from a
+    saved model, the weight bits should be specified in the `dtype_policy`.
+    Args:
+        config: An optional configuration object that may contain the
+            `weight_bits` attribute.
+    Returns:
+        int. The determined number of weight bits for GPTQ quantization.
+    Raises:
+        ValueError: If the weight bits is not specified in either the
+            `config` or the `dtype_policy`.
+    """
+    if config and isinstance(config, GPTQConfig):
+        return config.weight_bits
+    elif isinstance(layer.dtype_policy, GPTQDTypePolicy):
+        return layer.dtype_policy.weight_bits
+    elif isinstance(layer.dtype_policy, DTypePolicyMap):
+        policy = layer.dtype_policy[layer.path]
+        if not isinstance(policy, GPTQDTypePolicy):
+            # This should never happen based on how we set the
+            # quantization mode, but we check just in case.
+            raise ValueError(
+                "Expected a `dtype_policy` of type `GPTQDTypePolicy`."
+                f"Got: {type(policy)}"
+            )
+        return policy.weight_bits
+    else:
+        raise ValueError(
+            "For GPTQ quantization, the weight_bits must be specified"
+            "either through a `dtype_policy` of type "
+            "`GPTQDTypePolicy` or the `config` argument."
+        )

keras-nightly 3.12.0.dev2025083103__py3-none-any.whl → 3.14.0.dev2026011604__py3-none-any.whl

keras-nightly 3.12.0.dev2025083103py3-none-any.whl → 3.14.0.dev2026011604py3-none-any.whl