PyPI - keras-nightly - Versions diffs - 3.12.0.dev2025082103__py3-none-any.whl → 3.12.0.dev2025082303__py3-none-any.whl - Mend

keras-nightly 3.12.0.dev2025082103py3-none-any.whl → 3.12.0.dev2025082303py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (126) hide show

keras/_tf_keras/keras/ops/__init__.py +1 -0
keras/_tf_keras/keras/ops/numpy/__init__.py +1 -0
keras/_tf_keras/keras/quantizers/__init__.py +1 -0
keras/ops/__init__.py +1 -0
keras/ops/numpy/__init__.py +1 -0
keras/quantizers/__init__.py +1 -0
keras/src/applications/convnext.py +20 -20
keras/src/applications/densenet.py +21 -21
keras/src/applications/efficientnet.py +16 -16
keras/src/applications/efficientnet_v2.py +28 -28
keras/src/applications/inception_resnet_v2.py +7 -7
keras/src/applications/inception_v3.py +5 -5
keras/src/applications/mobilenet_v2.py +13 -20
keras/src/applications/mobilenet_v3.py +15 -15
keras/src/applications/nasnet.py +7 -8
keras/src/applications/resnet.py +32 -32
keras/src/applications/xception.py +10 -10
keras/src/backend/common/dtypes.py +8 -3
keras/src/backend/common/variables.py +3 -1
keras/src/backend/jax/export.py +1 -1
keras/src/backend/jax/numpy.py +6 -0
keras/src/backend/jax/trainer.py +1 -1
keras/src/backend/numpy/numpy.py +28 -0
keras/src/backend/openvino/numpy.py +5 -1
keras/src/backend/tensorflow/numpy.py +22 -0
keras/src/backend/tensorflow/trainer.py +19 -1
keras/src/backend/torch/core.py +6 -9
keras/src/backend/torch/nn.py +1 -2
keras/src/backend/torch/numpy.py +16 -0
keras/src/backend/torch/trainer.py +1 -1
keras/src/callbacks/backup_and_restore.py +2 -2
keras/src/callbacks/csv_logger.py +1 -1
keras/src/callbacks/model_checkpoint.py +1 -1
keras/src/callbacks/tensorboard.py +6 -6
keras/src/constraints/constraints.py +9 -7
keras/src/datasets/boston_housing.py +1 -1
keras/src/datasets/california_housing.py +1 -1
keras/src/datasets/cifar10.py +1 -1
keras/src/datasets/cifar100.py +2 -2
keras/src/datasets/imdb.py +2 -2
keras/src/datasets/mnist.py +1 -1
keras/src/datasets/reuters.py +2 -2
keras/src/dtype_policies/dtype_policy.py +1 -1
keras/src/dtype_policies/dtype_policy_map.py +1 -1
keras/src/export/tf2onnx_lib.py +1 -3
keras/src/initializers/constant_initializers.py +9 -5
keras/src/layers/input_spec.py +6 -6
keras/src/layers/layer.py +1 -1
keras/src/layers/preprocessing/category_encoding.py +3 -3
keras/src/layers/preprocessing/data_layer.py +159 -0
keras/src/layers/preprocessing/discretization.py +3 -3
keras/src/layers/preprocessing/feature_space.py +4 -4
keras/src/layers/preprocessing/image_preprocessing/aug_mix.py +7 -4
keras/src/layers/preprocessing/image_preprocessing/auto_contrast.py +3 -0
keras/src/layers/preprocessing/image_preprocessing/base_image_preprocessing_layer.py +2 -2
keras/src/layers/preprocessing/image_preprocessing/center_crop.py +1 -1
keras/src/layers/preprocessing/image_preprocessing/cut_mix.py +6 -3
keras/src/layers/preprocessing/image_preprocessing/equalization.py +1 -1
keras/src/layers/preprocessing/image_preprocessing/max_num_bounding_box.py +3 -0
keras/src/layers/preprocessing/image_preprocessing/mix_up.py +7 -4
keras/src/layers/preprocessing/image_preprocessing/rand_augment.py +3 -1
keras/src/layers/preprocessing/image_preprocessing/random_brightness.py +1 -1
keras/src/layers/preprocessing/image_preprocessing/random_color_degeneration.py +3 -0
keras/src/layers/preprocessing/image_preprocessing/random_color_jitter.py +3 -0
keras/src/layers/preprocessing/image_preprocessing/random_contrast.py +1 -1
keras/src/layers/preprocessing/image_preprocessing/random_crop.py +1 -1
keras/src/layers/preprocessing/image_preprocessing/random_elastic_transform.py +3 -0
keras/src/layers/preprocessing/image_preprocessing/random_erasing.py +6 -3
keras/src/layers/preprocessing/image_preprocessing/random_flip.py +1 -1
keras/src/layers/preprocessing/image_preprocessing/random_gaussian_blur.py +3 -0
keras/src/layers/preprocessing/image_preprocessing/random_grayscale.py +1 -1
keras/src/layers/preprocessing/image_preprocessing/random_hue.py +3 -0
keras/src/layers/preprocessing/image_preprocessing/random_invert.py +3 -0
keras/src/layers/preprocessing/image_preprocessing/random_perspective.py +3 -0
keras/src/layers/preprocessing/image_preprocessing/random_posterization.py +3 -0
keras/src/layers/preprocessing/image_preprocessing/random_rotation.py +1 -1
keras/src/layers/preprocessing/image_preprocessing/random_saturation.py +3 -0
keras/src/layers/preprocessing/image_preprocessing/random_sharpness.py +3 -0
keras/src/layers/preprocessing/image_preprocessing/random_shear.py +3 -0
keras/src/layers/preprocessing/image_preprocessing/random_translation.py +3 -3
keras/src/layers/preprocessing/image_preprocessing/random_zoom.py +3 -3
keras/src/layers/preprocessing/image_preprocessing/resizing.py +3 -3
keras/src/layers/preprocessing/image_preprocessing/solarization.py +3 -0
keras/src/layers/preprocessing/mel_spectrogram.py +29 -25
keras/src/layers/preprocessing/normalization.py +5 -2
keras/src/layers/preprocessing/rescaling.py +3 -3
keras/src/layers/rnn/bidirectional.py +4 -4
keras/src/legacy/backend.py +9 -23
keras/src/legacy/preprocessing/image.py +11 -22
keras/src/legacy/preprocessing/text.py +1 -1
keras/src/models/functional.py +2 -2
keras/src/models/model.py +21 -3
keras/src/ops/function.py +1 -1
keras/src/ops/numpy.py +49 -5
keras/src/ops/operation.py +3 -2
keras/src/optimizers/base_optimizer.py +3 -4
keras/src/optimizers/schedules/learning_rate_schedule.py +16 -9
keras/src/quantizers/gptq.py +350 -0
keras/src/quantizers/gptq_config.py +169 -0
keras/src/quantizers/gptq_core.py +335 -0
keras/src/quantizers/gptq_quant.py +133 -0
keras/src/saving/file_editor.py +22 -20
keras/src/saving/object_registration.py +1 -1
keras/src/saving/saving_lib.py +4 -4
keras/src/saving/serialization_lib.py +3 -5
keras/src/trainers/compile_utils.py +1 -1
keras/src/trainers/data_adapters/array_data_adapter.py +9 -3
keras/src/trainers/data_adapters/data_adapter_utils.py +15 -5
keras/src/trainers/data_adapters/generator_data_adapter.py +2 -0
keras/src/trainers/data_adapters/grain_dataset_adapter.py +8 -2
keras/src/trainers/data_adapters/tf_dataset_adapter.py +4 -2
keras/src/trainers/data_adapters/torch_data_loader_adapter.py +3 -1
keras/src/tree/dmtree_impl.py +19 -3
keras/src/tree/optree_impl.py +3 -3
keras/src/tree/tree_api.py +5 -2
keras/src/utils/file_utils.py +13 -5
keras/src/utils/io_utils.py +1 -1
keras/src/utils/model_visualization.py +1 -1
keras/src/utils/progbar.py +5 -5
keras/src/utils/summary_utils.py +4 -4
keras/src/version.py +1 -1
{keras_nightly-3.12.0.dev2025082103.dist-info → keras_nightly-3.12.0.dev2025082303.dist-info}/METADATA +1 -1
{keras_nightly-3.12.0.dev2025082103.dist-info → keras_nightly-3.12.0.dev2025082303.dist-info}/RECORD +125 -121
keras/src/layers/preprocessing/tf_data_layer.py +0 -78
{keras_nightly-3.12.0.dev2025082103.dist-info → keras_nightly-3.12.0.dev2025082303.dist-info}/WHEEL +0 -0
{keras_nightly-3.12.0.dev2025082103.dist-info → keras_nightly-3.12.0.dev2025082303.dist-info}/top_level.txt +0 -0

keras/src/quantizers/gptq_core.py ADDED Viewed

@@ -0,0 +1,335 @@
+import random
+import numpy as np
+from absl import logging
+from keras.src import ops
+from keras.src import utils as keras_utils
+from keras.src.layers import Dense
+from keras.src.layers import EinsumDense
+from keras.src.layers import Embedding
+from keras.src.quantizers.gptq import GPTQ
+from keras.src.quantizers.gptq_quant import GPTQQuantization
+def get_dataloader(tokenizer, sequence_length, dataset, num_samples=128):
+    """
+    Prepares and chunks the calibration dataloader, repeating short datasets.
+    """
+    all_tokens = []
+    if not hasattr(dataset, "__iter__") or isinstance(dataset, (str, bytes)):
+        raise TypeError(
+            "The `dataset` argument must be an iterable (e.g., a list of "
+            "strings, a generator, or a NumPy array). Got type: "
+            f"{type(dataset).__name__}. Please pass the loaded dataset "
+            "directly."
+        )
+    dataset_list = list(dataset)
+    if not dataset_list:
+        raise ValueError("Provided dataset is empty.")
+    if isinstance(dataset_list[0], str):
+        logging.info("(Dataset contains strings, tokenizing now...)")
+        full_text = "\n\n".join(dataset_list)
+        all_tokens = tokenizer.tokenize(full_text)
+    else:
+        logging.info("(Dataset is pre-tokenized, concatenating...)")
+        all_tokens = np.concatenate(
+            [ops.convert_to_numpy(s).reshape(-1) for s in dataset_list], axis=0
+        )
+    all_tokens = np.array(all_tokens, dtype=np.int32)
+    # Repeat data if it's too short
+    required_tokens = num_samples * sequence_length
+    if len(all_tokens) < required_tokens:
+        logging.info(
+            f"Warning: Dataset is too short ({len(all_tokens)} tokens)."
+            " Repeating data to generate {num_samples} samples."
+        )
+        repeats = -(-required_tokens // len(all_tokens))  # Ceiling division
+        all_tokens = np.tile(all_tokens, repeats)
+    # Chunk the token list into samples
+    calibration_samples = []
+    for _ in range(num_samples):
+        # Generate a random starting index
+        start_index = random.randint(0, len(all_tokens) - sequence_length - 1)
+        end_index = start_index + sequence_length
+        sample = all_tokens[start_index:end_index]
+        calibration_samples.append(np.reshape(sample, (1, sequence_length)))
+    final_array = np.stack(calibration_samples, axis=0)
+    return final_array
+def _find_layers_recursive(layer, prefix, found_layers):
+    """
+    Recursively search for Dense and EinsumDense layers and record them.
+    """
+    for sub_layer in layer._layers:
+        # Construct a unique name for the layer based on its hierarchy
+        layer_name = f"{prefix}.{sub_layer.name}"
+        if isinstance(sub_layer, (Dense, EinsumDense)):
+            found_layers[layer_name] = sub_layer
+        # Recurse into nested layers that are not the target types
+        elif hasattr(sub_layer, "_layers") and sub_layer._layers:
+            _find_layers_recursive(sub_layer, layer_name, found_layers)
+def find_layers_in_block(block):
+    """
+    A pluggable, generic function to find all Dense and EinsumDense layers
+    within any transformer block by using a recursive search.
+    """
+    found_layers = {}
+    # Start the recursive search from the block itself
+    _find_layers_recursive(block, "block", found_layers)
+    return found_layers
+def apply_gptq_layerwise(
+    model,
+    dataloader,
+    num_samples,
+    hessian_damping,
+    group_size,
+    symmetric,
+    activation_order,
+    weight_bits,
+):
+    """Applies GPTQ quantization layer-by-layer to a Keras model.
+    This function is designed to work with common transformer architectures,
+    like those provided by KerasHub. It automatically discovers the model's
+    structure by first looking for the standard format: a `model.backbone`
+    attribute that contains a `transformer_layers` list.
+    If a standard backbone is not found, it falls back to a heuristic for
+    custom models, where it assumes the first `keras.layers.Embedding` layer
+    is the input embedding and any subsequent container layers are the
+    transformer blocks to be quantized.
+    The core logic operates as follows:
+    1.  It automatically detects the model's structure, identifying the main
+        embedding layer and a sequence of transformer blocks.
+    2.  It processes the model sequentially, one block at a time. For each
+        block, it uses temporary hooks to capture the input activations of
+        each target layer during a forward pass with the calibration data.
+    3.  These captured activations are used to compute the Hessian matrix for
+        each layer's weights.
+    4.  The GPTQ algorithm is then applied to each layer to find the optimal
+        quantized weights that minimize the error introduced.
+    5.  The output activations from the current block are then used as the
+        input for the next block, ensuring that quantization errors are
+        accounted for throughout the model.
+    Args:
+        model: The Keras model instance to be quantized. The function will
+            attempt to automatically discover its structure.
+        dataloader: An iterable providing calibration data. Each item should
+            be a batch of token IDs suitable for the model's embedding layer.
+        num_samples: (int) The number of samples from the dataloader to use for
+            calibration.
+        hessian_damping: (float) The percentage of dampening to add to the
+            Hessian diagonal for stabilization during inverse calculation.
+            A value of 0.01 is common.
+        group_size: (int) The size of the groups to use for quantization. A
+            value of 128 means that 128 weights will share the same scaling
+            factor. Use -1 for per-channel quantization.
+        symmetric: (bool) If True, symmetric quantization is used. Otherwise,
+            asymmetric quantization is used.
+        activation_order: (bool) If True, reorders the weight columns based on
+            activation magnitude, which can improve quantization accuracy.
+        weight_bits: (int) The number of bits to use for the quantized weights,
+            e.g., 4 for 4-bit quantization.
+    Raises:
+        ValueError: If the function cannot automatically find an embedding
+            layer or any transformer-like blocks to quantize within the model.
+    """
+    logging.info("Starting model quantization...")
+    embedding_layer = None
+    transformer_blocks = []
+    if hasattr(model, "backbone"):
+        logging.info("Detected KerasHub model structure.")
+        backbone = model.backbone
+        # Add the check for the 'transformer_layers' attribute.
+        if hasattr(backbone, "transformer_layers"):
+            transformer_blocks = backbone.transformer_layers
+        else:
+            # Raise a specific error if the attribute is missing.
+            raise ValueError(
+                "The model's backbone does not have a 'transformer_layers' "
+                "attribute. Please ensure you are using a standard KerasHub "
+                "transformer model."
+            )
+        # Find the embedding layer by checking for common names or by type.
+        if hasattr(backbone, "token_embedding"):
+            embedding_layer = backbone.token_embedding
+        elif hasattr(backbone, "embedding"):
+            embedding_layer = backbone.embedding
+        else:
+            raise ValueError(
+                "Could not automatically find an embedding layer in the model."
+            )
+    else:
+        logging.info("Detected custom model structure.")
+        for layer in model.layers:
+            # The first Embedding layer found is assumed to be the main one.
+            if isinstance(layer, Embedding) and embedding_layer is None:
+                embedding_layer = layer
+            # A "block" is a container-like layer with its own sub-layers
+            # that we can quantize. This is a heuristic that works for the
+            # test.
+            elif hasattr(layer, "_layers") and layer._layers:
+                transformer_blocks.append(layer)
+    if embedding_layer is None:
+        raise ValueError(
+            "Could not automatically find an embedding layer in the model."
+        )
+    if not transformer_blocks:
+        raise ValueError(
+            "Could not automatically find any transformer-like blocks to "
+            "quantize."
+        )
+    # Initial inputs are the outputs of the token embedding layer
+    inputs = [
+        embedding_layer(ops.convert_to_tensor(batch, dtype="int32"))
+        for batch in dataloader
+    ]
+    progbar = keras_utils.Progbar(target=len(transformer_blocks))
+    for block_idx, block in enumerate(transformer_blocks):
+        logging.info(f"Quantizing Block {block_idx}")
+        sub_layers_map = find_layers_in_block(block)
+        if not sub_layers_map:
+            logging.info(
+                f"  No Dense or EinsumDense layers found in block {block_idx}. "
+                "Skipping."
+            )
+        else:
+            logging.info(f"Found layers: {list(sub_layers_map.keys())}")
+            gptq_objects = {
+                name: GPTQ(layer) for name, layer in sub_layers_map.items()
+            }
+            captured_inputs = {name: [] for name in sub_layers_map.keys()}
+            original_calls = {}
+            def create_hook(name, original_call_func):
+                """A factory for creating a hook to capture layer inputs."""
+                def hook(*args, **kwargs):
+                    if args:
+                        inp = args[0]
+                    else:
+                        inp = kwargs["inputs"]
+                    captured_inputs[name].append(inp)
+                    return original_call_func(*args, **kwargs)
+                return hook
+            try:
+                for name, layer in sub_layers_map.items():
+                    original_call = layer.call
+                    original_calls[name] = original_call
+                    layer.call = create_hook(name, original_call)
+                logging.info(f"Capturing activations for block {block_idx}...")
+                for sample_idx in range(num_samples):
+                    current_input = inputs[sample_idx]
+                    if len(current_input.shape) == 2:
+                        current_input = ops.expand_dims(current_input, axis=0)
+                    _ = block(current_input)
+            finally:
+                for name, layer in sub_layers_map.items():
+                    if name in original_calls:
+                        layer.call = original_calls[name]
+            logging.info(f"Building Hessians for block {block_idx}...")
+            for name, gptq_object in gptq_objects.items():
+                layer_inputs = ops.concatenate(captured_inputs[name], axis=0)
+                # Explicitly reshape the input tensor to be 2D, with the second
+                # dimension matching the number of input features expected by
+                # the layer's kernel.
+                # This correctly handles inputs of any dimensionality
+                # (e.g., 3D or 4D).
+                num_features = gptq_object.rows
+                input_reshaped = ops.reshape(layer_inputs, (-1, num_features))
+                gptq_object.update_hessian_with_batch(input_reshaped)
+                quantizer = GPTQQuantization(
+                    weight_bits,
+                    per_channel=True,
+                    symmetric=symmetric,
+                    group_size=group_size,
+                )
+            for name, gptq_object in gptq_objects.items():
+                logging.info(f"Quantizing {name}...")
+                gptq_object.quantizer = quantizer
+                gptq_object.quantize_and_correct_block(
+                    hessian_damping=hessian_damping,
+                    group_size=group_size,
+                    activation_order=activation_order,
+                )
+                gptq_object.free()
+            del gptq_objects, captured_inputs, original_calls
+        if block_idx < len(transformer_blocks) - 1:
+            logging.info(f"Generating inputs for block {block_idx + 1}...")
+            next_block_inputs = []
+            for sample_idx in range(num_samples):
+                current_input = inputs[sample_idx]
+                if len(current_input.shape) == 2:
+                    current_input = ops.expand_dims(current_input, axis=0)
+                output = block(current_input)[0]
+                next_block_inputs.append(output)
+            inputs = next_block_inputs
+        progbar.update(current=block_idx + 1)
+    logging.info("Quantization process complete.")
+def quantize_model(model, config):
+    """
+    Top-level function to quantize a Keras model using GPTQ.
+    """
+    logging.info("Starting GPTQ quantization process...")
+    # Load ALL data needed from the generator/source in a single call.
+    total_samples_to_request = config.num_samples
+    full_dataloader = get_dataloader(
+        config.tokenizer,
+        config.sequence_length,
+        config.dataset,
+        num_samples=total_samples_to_request,
+    )
+    # Split the materialized data. This works because full_dataloader
+    # is now a NumPy array, which can be sliced and reused.
+    calibration_dataloader = full_dataloader[: config.num_samples]
+    apply_gptq_layerwise(
+        model,
+        calibration_dataloader,  # Use the calibration slice
+        config.num_samples,  # Use the configured number of samples
+        config.hessian_damping,
+        config.group_size,
+        config.symmetric,
+        config.activation_order,
+        config.weight_bits,
+    )

keras/src/quantizers/gptq_quant.py ADDED Viewed

@@ -0,0 +1,133 @@
+from keras.src import ops
+def dequantize(input_tensor, scale, zero, maxq):
+    """The core quantization function."""
+    epsilon = ops.cast(1e-8, dtype=scale.dtype)
+    scale = ops.where(ops.equal(scale, 0), epsilon, scale)
+    quantized_tensor = ops.divide(input_tensor, scale)
+    quantized_tensor = ops.round(quantized_tensor)
+    q = ops.add(quantized_tensor, zero)
+    q = ops.clip(q, 0, maxq)
+    dequantized_tensor = ops.subtract(q, zero)
+    return ops.multiply(scale, dequantized_tensor)
+class GPTQQuantization:
+    """A class that handles the quantization of weights using GPTQ method.
+    This class provides methods to find quantization parameters (scale and zero)
+    for a given tensor and can be used to quantize weights in a GPTQ context.
+    Args:
+        weight_bits: (int) The number of bits to quantize to (e.g., 4).
+        per_channel: (bool) A flag indicating whether quantization is
+            applied per-channel (`True`) or per-tensor (`False`).
+            Defaults to `False`.
+        symmetric: (bool) A flag indicating whether symmetric (`True`) or
+            asymmetric (`False`) quantization is used. Defaults to `False`.
+        group_size: (int) The size of weight groups for quantization. A
+            value of -1 indicates that grouping is not used.
+            Defaults to -1.
+    """
+    def __init__(
+        self, weight_bits, per_channel=True, symmetric=False, group_size=-1
+    ):
+        self.weight_bits = weight_bits
+        self.maxq = ops.cast(
+            ops.subtract(ops.power(2, weight_bits), 1), "float32"
+        )
+        self.per_channel = per_channel
+        self.symmetric = symmetric
+        self.group_size = group_size
+        # These are now determined later by `find_params`
+        self.scale = None
+        self.zero = None
+    def find_params(self, input_tensor, weight=False):
+        """Finds quantization parameters (scale and zero) for a given tensor."""
+        if input_tensor is None:
+            raise ValueError("Input tensor 'input_tensor' cannot be None.")
+        # For weights, we typically expect at least a 2D tensor.
+        if weight and len(input_tensor.shape) < 2:
+            raise ValueError(
+                f"Input weight tensor 'input_tensor' must have a rank of at "
+                f"least 2, but got rank {len(input_tensor.shape)}."
+            )
+        if ops.size(input_tensor) == 0:
+            raise ValueError("Input tensor 'input_tensor' cannot be empty.")
+        original_shape = input_tensor.shape
+        if self.per_channel:
+            if weight:
+                if self.group_size != -1:
+                    input_reshaped = ops.reshape(
+                        input_tensor, [-1, self.group_size]
+                    )
+                else:
+                    input_reshaped = ops.reshape(
+                        input_tensor, [original_shape[0], -1]
+                    )
+        else:  # per-tensor
+            input_reshaped = ops.reshape(input_tensor, [1, -1])
+        # Find min/max values
+        min_values = ops.min(input_reshaped, axis=1)
+        max_values = ops.max(input_reshaped, axis=1)
+        # Apply symmetric quantization logic if enabled
+        if self.symmetric:
+            max_values = ops.maximum(ops.abs(min_values), max_values)
+            min_values = ops.where(
+                ops.less(min_values, 0), ops.negative(max_values), min_values
+            )
+        # Ensure range is not zero to avoid division errors
+        zero_range = ops.equal(min_values, max_values)
+        min_values = ops.where(
+            zero_range, ops.subtract(min_values, 1), min_values
+        )
+        max_values = ops.where(zero_range, ops.add(max_values, 1), max_values)
+        # Calculate scale and zero-point
+        self.scale = ops.divide(ops.subtract(max_values, min_values), self.maxq)
+        if self.symmetric:
+            self.zero = ops.full_like(
+                self.scale, ops.divide(ops.add(self.maxq, 1), 2)
+            )
+        else:
+            self.zero = ops.round(
+                ops.divide(ops.negative(min_values), self.scale)
+            )
+        # Ensure scale is non-zero
+        self.scale = ops.where(ops.less_equal(self.scale, 0), 1e-8, self.scale)
+        if weight:
+            # Per-channel, non-grouped case: simple reshape is correct.
+            if self.per_channel and self.group_size == -1:
+                self.scale = ops.reshape(self.scale, [-1, 1])
+                self.zero = ops.reshape(self.zero, [-1, 1])
+            elif not self.per_channel:
+                num_rows = original_shape[0]
+                self.scale = ops.tile(
+                    ops.reshape(self.scale, (1, 1)), (num_rows, 1)
+                )
+                self.zero = ops.tile(
+                    ops.reshape(self.zero, (1, 1)), (num_rows, 1)
+                )
+        if self.per_channel:
+            self.scale = ops.reshape(self.scale, [-1, 1])
+            self.zero = ops.reshape(self.zero, [-1, 1])
+    def ready(self):
+        """Checks if the quantization parameters have been computed."""
+        return self.scale is not None and self.zero is not None

keras/src/saving/file_editor.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import collections
 import json
+import os.path
 import pprint
 import zipfile
@@ -76,7 +77,7 @@ class KerasFileEditor:
         if filepath.endswith(".keras"):
             zf = zipfile.ZipFile(filepath, "r")
             weights_store = H5IOStore(
-                saving_lib._VARS_FNAME + ".h5",
+                f"{saving_lib._VARS_FNAME}.h5",
                 archive=zf,
                 mode="r",
             )
@@ -143,7 +144,7 @@ class KerasFileEditor:
         ):
             base_inner_path = inner_path
             for ref_key, ref_val in ref_spec.items():
-                inner_path = base_inner_path + "/" + ref_key
+                inner_path = f"{base_inner_path}/{ref_key}"
                 if inner_path in checked_paths:
                     continue
@@ -435,7 +436,7 @@ class KerasFileEditor:
                         _save(
                             weights_dict[name],
                             weights_store,
-                            inner_path=inner_path + "/" + name,
+                            inner_path=os.path.join(inner_path, name),
                         )
                 else:
                     # e.g. name="0", value=HDF5Dataset
@@ -462,7 +463,7 @@ class KerasFileEditor:
         result = collections.OrderedDict()
         for key in data.keys():
-            inner_path = inner_path + "/" + key
+            inner_path = f"{inner_path}/{key}"
             value = data[key]
             if isinstance(value, h5py.Group):
                 if len(value) == 0:
@@ -506,7 +507,7 @@ class KerasFileEditor:
         self, weights_dict, indent=0, is_first=True, prefix="", inner_path=""
     ):
         for idx, (key, value) in enumerate(weights_dict.items()):
-            inner_path = inner_path + "/" + key
+            inner_path = os.path.join(inner_path, key)
             is_last = idx == len(weights_dict) - 1
             if is_first:
                 is_first = False
@@ -556,29 +557,30 @@ class KerasFileEditor:
             html = ""
             for key, value in dictionary.items():
                 if isinstance(value, dict) and value:
+                    weights_html = _generate_html_weights(
+                        value, margin_left + 20, font_size - 1
+                    )
                     html += (
                         f'<details style="margin-left: {margin_left}px;">'
-                        + '<summary style="'
-                        + f"font-size: {font_size}em; "
-                        + "font-weight: bold;"
-                        + f'">{key}</summary>'
-                        + _generate_html_weights(
-                            value, margin_left + 20, font_size - 1
-                        )
-                        + "</details>"
+                        '<summary style="'
+                        f"font-size: {font_size}em; "
+                        "font-weight: bold;"
+                        f'">{key}</summary>'
+                        f"{weights_html}"
+                        "</details>"
                     )
                 else:
                     html += (
                         f'<details style="margin-left: {margin_left}px;">'
-                        + f'<summary style="font-size: {font_size}em;">'
-                        + f"{key} : shape={value.shape}"
-                        + f", dtype={value.dtype}</summary>"
-                        + f"<div style="
+                        f'<summary style="font-size: {font_size}em;">'
+                        f"{key} : shape={value.shape}"
+                        f", dtype={value.dtype}</summary>"
+                        f"<div style="
                         f'"margin-left: {margin_left}px;'
                         f'"margin-top: {margin_left}px;">'
-                        + f"{display_weight(value)}"
-                        + "</div>"
-                        + "</details>"
+                        f"{display_weight(value)}"
+                        "</div>"
+                        "</details>"
                     )
             return html

keras/src/saving/object_registration.py CHANGED Viewed

@@ -140,7 +140,7 @@ def register_keras_serializable(package="Custom", name=None):
     def decorator(arg):
         """Registers a class with the Keras serialization framework."""
         class_name = name if name is not None else arg.__name__
-        registered_name = package + ">" + class_name
+        registered_name = f"{package}>{class_name}"
         if inspect.isclass(arg) and not hasattr(arg, "get_config"):
             raise ValueError(

keras/src/saving/saving_lib.py CHANGED Viewed

@@ -46,8 +46,8 @@ except ImportError:
 _CONFIG_FILENAME = "config.json"
 _METADATA_FILENAME = "metadata.json"
 _VARS_FNAME = "model.weights"  # Will become e.g. "model.weights.h5"
-_VARS_FNAME_H5 = _VARS_FNAME + ".h5"
-_VARS_FNAME_NPZ = _VARS_FNAME + ".npz"
+_VARS_FNAME_H5 = f"{_VARS_FNAME}.h5"
+_VARS_FNAME_NPZ = f"{_VARS_FNAME}.npz"
 _ASSETS_DIRNAME = "assets"
 _MEMORY_UPPER_BOUND = 0.5  # 50%
@@ -664,7 +664,7 @@ def _write_to_zip_recursively(zipfile_to_save, system_path, zip_path):
 def _name_key(name):
     """Make sure that private attributes are visited last."""
     if name.startswith("_"):
-        return "~" + name
+        return f"~{name}"
     return name
@@ -1288,7 +1288,7 @@ class ShardedH5IOStore(H5IOStore):
         # If not found, check shard map and switch files.
         weight_map = self.sharding_config["weight_map"]
         filenames = weight_map.get(parsed_path) or weight_map.get(
-            "/" + parsed_path + "/vars"
+            f"/{parsed_path}/vars"
         )
         if filenames is not None:
             if not isinstance(filenames, list):

keras/src/saving/serialization_lib.py CHANGED Viewed

@@ -778,7 +778,7 @@ def _retrieve_class_or_fn(
         # module name might not match the package structure
         # (e.g. experimental symbols).
         if module == "keras" or module.startswith("keras."):
-            api_name = module + "." + name
+            api_name = f"{module}.{name}"
             if api_name in LOADING_APIS:
                 raise ValueError(
@@ -796,9 +796,7 @@ def _retrieve_class_or_fn(
         # the corresponding function from the identifying string.
         if obj_type == "function" and module == "builtins":
             for mod in BUILTIN_MODULES:
-                obj = api_export.get_symbol_from_name(
-                    "keras." + mod + "." + name
-                )
+                obj = api_export.get_symbol_from_name(f"keras.{mod}.{name}")
                 if obj is not None:
                     return obj
@@ -807,7 +805,7 @@ def _retrieve_class_or_fn(
             # i.e. "name" instead of "package>name". This allows recent versions
             # of Keras to reload models saved with 3.6 and lower.
             if ">" not in name:
-                separated_name = ">" + name
+                separated_name = f">{name}"
                 for custom_name, custom_object in custom_objects.items():
                     if custom_name.endswith(separated_name):
                         return custom_object

keras/src/trainers/compile_utils.py CHANGED Viewed

@@ -659,7 +659,7 @@ class CompileLoss(losses_module.Loss):
         # Add `Mean` metric to the tracker for each loss.
         if len(self._flat_losses) > 1:
             for _loss in self._flat_losses:
-                name = _loss.name + "_loss"
+                name = f"{_loss.name}_loss"
                 self._tracker.add_to_store(
                     "metrics", metrics_module.Mean(name=name)
                 )

keras/src/trainers/data_adapters/array_data_adapter.py CHANGED Viewed

@@ -76,7 +76,9 @@ class ArrayDataAdapter(DataAdapter):
         inputs = data_adapter_utils.pack_x_y_sample_weight(x, y, sample_weight)
         data_adapter_utils.check_data_cardinality(inputs)
-        num_samples = set(i.shape[0] for i in tree.flatten(inputs)).pop()
+        num_samples = set(
+            i.shape[0] for i in tree.flatten(inputs) if i is not None
+        ).pop()
         self._num_samples = num_samples
         self._inputs = inputs
@@ -269,7 +271,9 @@ class ArrayDataAdapter(DataAdapter):
                     x = convert_to_tensor(x)
                     return x
-                return tree.map_structure(slice_and_convert, self.array)
+                return tree.map_structure(
+                    slice_and_convert, self.array, none_is_leaf=False
+                )
             def __len__(self):
                 return len(self.array[0])
@@ -337,7 +341,9 @@ class ArrayDataAdapter(DataAdapter):
             slice_indices_and_convert_fn = functools.partial(
                 slice_and_convert_fn, indices=indices
             )
-            yield tree.map_structure(slice_indices_and_convert_fn, inputs)
+            yield tree.map_structure(
+                slice_indices_and_convert_fn, inputs, none_is_leaf=False
+            )
     @property
     def num_batches(self):

keras-nightly 3.12.0.dev2025082103__py3-none-any.whl → 3.12.0.dev2025082303__py3-none-any.whl

keras-nightly 3.12.0.dev2025082103py3-none-any.whl → 3.12.0.dev2025082303py3-none-any.whl