PyPI - keras-hub - Versions diffs - 0.25.0.dev0__py3-none-any.whl → 0.26.0.dev0__py3-none-any.whl - Mend

keras-hub 0.25.0.dev0py3-none-any.whl → 0.26.0.dev0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (109) hide show

keras_hub/layers/__init__.py +21 -0
keras_hub/models/__init__.py +27 -0
keras_hub/src/layers/modeling/non_max_supression.py +5 -2
keras_hub/src/layers/modeling/reversible_embedding.py +2 -275
keras_hub/src/layers/modeling/token_and_position_embedding.py +6 -6
keras_hub/src/layers/modeling/transformer_layer_utils.py +9 -9
keras_hub/src/layers/preprocessing/masked_lm_mask_generator.py +3 -1
keras_hub/src/layers/preprocessing/multi_segment_packer.py +3 -1
keras_hub/src/models/albert/albert_backbone.py +1 -3
keras_hub/src/models/backbone.py +3 -0
keras_hub/src/models/bart/bart_backbone.py +1 -3
keras_hub/src/models/bert/bert_backbone.py +2 -4
keras_hub/src/models/bloom/bloom_backbone.py +1 -3
keras_hub/src/models/causal_lm.py +2 -2
keras_hub/src/models/deberta_v3/deberta_v3_backbone.py +1 -3
keras_hub/src/models/edrec/edrec_backbone.py +147 -0
keras_hub/src/models/edrec/edrec_layers.py +434 -0
keras_hub/src/models/edrec/edrec_seq2seq_lm.py +273 -0
keras_hub/src/models/electra/electra_backbone.py +1 -3
keras_hub/src/models/f_net/f_net_backbone.py +1 -3
keras_hub/src/models/falcon/falcon_backbone.py +1 -3
keras_hub/src/models/flux/flux_layers.py +3 -3
keras_hub/src/models/flux/flux_maths.py +29 -15
keras_hub/src/models/gemma/gemma_backbone.py +1 -3
keras_hub/src/models/gemma/gemma_causal_lm.py +1 -1
keras_hub/src/models/gemma3/gemma3_attention.py +1 -1
keras_hub/src/models/gemma3/gemma3_backbone.py +70 -8
keras_hub/src/models/gemma3/gemma3_causal_lm.py +16 -1
keras_hub/src/models/gemma3/gemma3_decoder_block.py +23 -3
keras_hub/src/models/gemma3/{gemma3_interleave_embeddings.py → gemma3_layers.py} +101 -0
keras_hub/src/models/gemma3/gemma3_presets.py +79 -7
keras_hub/src/models/gemma3/gemma3_vision_encoder.py +1 -1
keras_hub/src/models/gpt2/gpt2_backbone.py +1 -3
keras_hub/src/models/gpt2/gpt2_causal_lm.py +1 -1
keras_hub/src/models/gpt_neo_x/gpt_neo_x_backbone.py +1 -3
keras_hub/src/models/gpt_oss/gpt_oss_backbone.py +1 -3
keras_hub/src/models/llama/llama_backbone.py +1 -3
keras_hub/src/models/masked_lm.py +1 -1
keras_hub/src/models/mistral/mistral_backbone.py +1 -3
keras_hub/src/models/mixtral/mixtral_backbone.py +1 -3
keras_hub/src/models/moonshine/moonshine_backbone.py +1 -3
keras_hub/src/models/pali_gemma/pali_gemma_backbone.py +1 -3
keras_hub/src/models/parseq/parseq_tokenizer.py +3 -1
keras_hub/src/models/phi3/phi3_backbone.py +1 -3
keras_hub/src/models/qwen/qwen_backbone.py +1 -3
keras_hub/src/models/qwen/qwen_presets.py +209 -0
keras_hub/src/models/qwen3/qwen3_backbone.py +1 -3
keras_hub/src/models/qwen3_moe/qwen3_moe_backbone.py +1 -3
keras_hub/src/models/qwen3_moe/qwen3_moe_presets.py +15 -0
keras_hub/src/models/qwen_moe/qwen_moe_backbone.py +1 -3
keras_hub/src/models/roformer_v2/roformer_v2_backbone.py +1 -3
keras_hub/src/models/rqvae/__init__.py +5 -0
keras_hub/src/models/rqvae/rqvae_backbone.py +167 -0
keras_hub/src/models/rqvae/rqvae_layers.py +335 -0
keras_hub/src/models/rwkv7/__init__.py +5 -0
keras_hub/src/models/rwkv7/rwkv7_backbone.py +180 -0
keras_hub/src/models/rwkv7/rwkv7_causal_lm.py +259 -0
keras_hub/src/models/rwkv7/rwkv7_causal_lm_preprocessor.py +214 -0
keras_hub/src/models/rwkv7/rwkv7_layer.py +724 -0
keras_hub/src/models/rwkv7/rwkv7_presets.py +26 -0
keras_hub/src/models/rwkv7/rwkv7_tokenizer.py +495 -0
keras_hub/src/models/sam/sam_backbone.py +5 -1
keras_hub/src/models/sam/sam_prompt_encoder.py +1 -1
keras_hub/src/models/sam3/__init__.py +7 -0
keras_hub/src/models/sam3/roi_align.py +222 -0
keras_hub/src/models/sam3/sam3_detr_decoder.py +641 -0
keras_hub/src/models/sam3/sam3_detr_encoder.py +293 -0
keras_hub/src/models/sam3/sam3_dot_product_scoring.py +120 -0
keras_hub/src/models/sam3/sam3_geometry_encoder.py +517 -0
keras_hub/src/models/sam3/sam3_image_converter.py +10 -0
keras_hub/src/models/sam3/sam3_layers.py +814 -0
keras_hub/src/models/sam3/sam3_mask_decoder.py +374 -0
keras_hub/src/models/sam3/sam3_pc_backbone.py +306 -0
keras_hub/src/models/sam3/sam3_pc_image_segmenter.py +282 -0
keras_hub/src/models/sam3/sam3_pc_image_segmenter_preprocessor.py +336 -0
keras_hub/src/models/sam3/sam3_presets.py +16 -0
keras_hub/src/models/sam3/sam3_text_encoder.py +212 -0
keras_hub/src/models/sam3/sam3_tokenizer.py +65 -0
keras_hub/src/models/sam3/sam3_utils.py +134 -0
keras_hub/src/models/sam3/sam3_vision_encoder.py +738 -0
keras_hub/src/models/segformer/segformer_backbone.py +6 -6
keras_hub/src/models/siglip/siglip_layers.py +1 -3
keras_hub/src/models/smollm3/smollm3_backbone.py +1 -3
keras_hub/src/models/stable_diffusion_3/t5_encoder.py +1 -3
keras_hub/src/models/t5/t5_backbone.py +1 -3
keras_hub/src/models/t5gemma/t5gemma_backbone.py +1 -3
keras_hub/src/models/task.py +1 -1
keras_hub/src/tests/test_case.py +394 -3
keras_hub/src/tokenizers/byte_pair_tokenizer.py +33 -2
keras_hub/src/tokenizers/byte_tokenizer.py +3 -1
keras_hub/src/tokenizers/sentence_piece_tokenizer.py +15 -1
keras_hub/src/tokenizers/unicode_codepoint_tokenizer.py +3 -1
keras_hub/src/tokenizers/word_piece_tokenizer.py +15 -1
keras_hub/src/utils/preset_utils.py +1 -1
keras_hub/src/utils/tensor_utils.py +12 -0
keras_hub/src/utils/transformers/convert_gemma3.py +68 -22
keras_hub/src/utils/transformers/convert_qwen3_moe.py +4 -1
keras_hub/src/utils/transformers/convert_sam3.py +472 -0
keras_hub/src/utils/transformers/export/gemma3.py +196 -0
keras_hub/src/utils/transformers/export/hf_exporter.py +86 -25
keras_hub/src/utils/transformers/export/qwen.py +136 -0
keras_hub/src/utils/transformers/preset_loader.py +15 -1
keras_hub/src/version.py +1 -1
keras_hub/tokenizers/__init__.py +6 -0
{keras_hub-0.25.0.dev0.dist-info → keras_hub-0.26.0.dev0.dist-info}/METADATA +6 -13
{keras_hub-0.25.0.dev0.dist-info → keras_hub-0.26.0.dev0.dist-info}/RECORD +108 -76
{keras_hub-0.25.0.dev0.dist-info → keras_hub-0.26.0.dev0.dist-info}/WHEEL +1 -1
keras_hub/src/models/gemma3/rms_normalization.py +0 -26
{keras_hub-0.25.0.dev0.dist-info → keras_hub-0.26.0.dev0.dist-info}/top_level.txt +0 -0

keras_hub/src/models/segformer/segformer_backbone.py CHANGED Viewed

@@ -40,15 +40,15 @@ class SegFormerBackbone(Backbone):
     import keras_hub
     backbone = keras_hub.models.MiTBackbone(
-        depths=[2, 2, 2, 2],
         image_shape=(224, 224, 3),
-        hidden_dims=[32, 64, 160, 256],
         num_layers=4,
-        blockwise_num_heads=[1, 2, 5, 8],
-        blockwise_sr_ratios=[8, 4, 2, 1],
+        hidden_dims=[32, 64, 160, 256],
+        layerwise_depths=[2, 2, 2, 2],
+        layerwise_num_heads=[1, 2, 5, 8],
+        layerwise_sr_ratios=[8, 4, 2, 1],
+        layerwise_patch_sizes=[7, 3, 3, 3],
+        layerwise_strides=[4, 2, 2, 2],
         max_drop_path_rate=0.1,
-        patch_sizes=[7, 3, 3, 3],
-        strides=[4, 2, 2, 2],
     )
     segformer_backbone = keras_hub.models.SegFormerBackbone(

keras_hub/src/models/siglip/siglip_layers.py CHANGED Viewed

@@ -3,10 +3,8 @@ import math
 from keras import initializers
 from keras import layers
 from keras import ops
+from keras.layers import ReversibleEmbedding
-from keras_hub.src.layers.modeling.reversible_embedding import (
-    ReversibleEmbedding,
-)
 from keras_hub.src.utils.keras_utils import clone_initializer
 from keras_hub.src.utils.keras_utils import gelu_approximate
 from keras_hub.src.utils.keras_utils import standardize_data_format

keras_hub/src/models/smollm3/smollm3_backbone.py CHANGED Viewed

@@ -1,9 +1,7 @@
 import keras
+from keras.layers import ReversibleEmbedding
 from keras_hub.src.api_export import keras_hub_export
-from keras_hub.src.layers.modeling.reversible_embedding import (
-    ReversibleEmbedding,
-)
 from keras_hub.src.models.backbone import Backbone
 from keras_hub.src.models.smollm3.smollm3_layers import SmolLM3DecoderLayer

keras_hub/src/models/stable_diffusion_3/t5_encoder.py CHANGED Viewed

@@ -1,8 +1,6 @@
 import keras
+from keras.layers import ReversibleEmbedding
-from keras_hub.src.layers.modeling.reversible_embedding import (
-    ReversibleEmbedding,
-)
 from keras_hub.src.models.t5.t5_layer_norm import T5LayerNorm
 from keras_hub.src.models.t5.t5_transformer_layer import T5TransformerLayer

keras_hub/src/models/t5/t5_backbone.py CHANGED Viewed

@@ -1,9 +1,7 @@
 import keras
+from keras.layers import ReversibleEmbedding
 from keras_hub.src.api_export import keras_hub_export
-from keras_hub.src.layers.modeling.reversible_embedding import (
-    ReversibleEmbedding,
-)
 from keras_hub.src.models.backbone import Backbone
 from keras_hub.src.models.t5.t5_layer_norm import T5LayerNorm
 from keras_hub.src.models.t5.t5_transformer_layer import T5TransformerLayer

keras_hub/src/models/t5gemma/t5gemma_backbone.py CHANGED Viewed

@@ -1,9 +1,7 @@
 import keras
+from keras.layers import ReversibleEmbedding
 from keras_hub.src.api_export import keras_hub_export
-from keras_hub.src.layers.modeling.reversible_embedding import (
-    ReversibleEmbedding,
-)
 from keras_hub.src.models.backbone import Backbone
 from keras_hub.src.models.gemma.rms_normalization import RMSNormalization
 from keras_hub.src.models.t5gemma.t5gemma_decoder import T5GemmaDecoderLayer

keras_hub/src/models/task.py CHANGED Viewed

@@ -361,7 +361,7 @@ class Task(PipelineModel):
             # Output captured summary for non-interactive logging.
             if print_fn:
-                print_fn(console.end_capture(), line_break=False)
+                print_fn(console.end_capture().rstrip("\n"))
         super().summary(
             line_length=line_length,

keras_hub/src/tests/test_case.py CHANGED Viewed

@@ -1,18 +1,19 @@
+import gc
 import json
 import os
 import pathlib
 import re
+import tempfile
 import keras
 import numpy as np
+import packaging.version
 import tensorflow as tf
 from absl.testing import parameterized
 from keras import ops
 from keras import tree
+from keras.layers import ReversibleEmbedding
-from keras_hub.src.layers.modeling.reversible_embedding import (
-    ReversibleEmbedding,
-)
 from keras_hub.src.models.retinanet.feature_pyramid import FeaturePyramid
 from keras_hub.src.tokenizers.tokenizer import Tokenizer
 from keras_hub.src.utils.tensor_utils import is_float_dtype
@@ -433,6 +434,396 @@ class TestCase(tf.test.TestCase, parameterized.TestCase):
         restored_output = restored_model(input_data)
         self.assertAllClose(model_output, restored_output, atol=atol, rtol=rtol)
+    def _verify_litert_outputs(
+        self,
+        keras_output,
+        litert_output,
+        sig_outputs,
+        expected_output_shape=None,
+        verify_numerics=True,
+        comparison_mode="strict",
+        output_thresholds=None,
+    ):
+        """Verify LiteRT outputs against expected shape and Keras outputs.
+        Args:
+            keras_output: Keras model output (can be None if not verifying
+                numerics)
+            litert_output: LiteRT interpreter output
+            sig_outputs: Output names from SignatureDef
+            expected_output_shape: Expected output shape (optional)
+            verify_numerics: Whether to verify numerical correctness
+            comparison_mode: "strict" or "statistical"
+            output_thresholds: Thresholds for statistical comparison
+        """
+        # Handle single output case: if Keras has single output but LiteRT
+        # returns dict
+        if (
+            not isinstance(keras_output, dict)
+            and isinstance(litert_output, dict)
+            and len(litert_output) == 1
+        ):
+            litert_output = list(litert_output.values())[0]
+        # Verify output shape if specified
+        if expected_output_shape is not None:
+            self.assertEqual(litert_output.shape, expected_output_shape)
+        # Verify numerical correctness if requested
+        if verify_numerics:
+            self._verify_litert_numerics(
+                keras_output,
+                litert_output,
+                sig_outputs,
+                output_thresholds,
+                comparison_mode,
+            )
+    def _verify_litert_numerics(
+        self,
+        keras_output,
+        litert_output,
+        sig_outputs,
+        output_thresholds,
+        comparison_mode,
+    ):
+        """Verify numerical accuracy between Keras and LiteRT outputs.
+        This method compares outputs using the SignatureDef output names to
+        match Keras outputs with LiteRT outputs properly.
+        Args:
+            keras_output: Keras model output (tensor or dict)
+            litert_output: LiteRT interpreter output (tensor or dict)
+            sig_outputs: List of output names from SignatureDef
+            output_thresholds: Dict of thresholds for comparison
+            comparison_mode: "strict" or "statistical"
+        """
+        if isinstance(keras_output, dict) and isinstance(litert_output, dict):
+            # Both outputs are dicts - compare using SignatureDef output names
+            for output_name in sig_outputs:
+                if output_name not in keras_output:
+                    self.fail(
+                        f"SignatureDef output '{output_name}' not found in "
+                        f"Keras outputs.\n"
+                        f"Keras keys: {list(keras_output.keys())}"
+                    )
+                if output_name not in litert_output:
+                    self.fail(
+                        f"SignatureDef output '{output_name}' not found in "
+                        f"LiteRT outputs.\n"
+                        f"LiteRT keys: {list(litert_output.keys())}"
+                    )
+                keras_val_np = ops.convert_to_numpy(keras_output[output_name])
+                litert_val = litert_output[output_name]
+                output_threshold = output_thresholds.get(
+                    output_name,
+                    output_thresholds.get("*", {"max": 10.0, "mean": 0.1}),
+                )
+                self._compare_outputs(
+                    keras_val_np,
+                    litert_val,
+                    comparison_mode,
+                    output_name,
+                    output_threshold["max"],
+                    output_threshold["mean"],
+                )
+        elif not isinstance(keras_output, dict) and not isinstance(
+            litert_output, dict
+        ):
+            # Both outputs are single tensors - direct comparison
+            keras_output_np = ops.convert_to_numpy(keras_output)
+            output_threshold = output_thresholds.get(
+                "*", {"max": 1e-2, "mean": 1e-3}
+            )
+            self._compare_outputs(
+                keras_output_np,
+                litert_output,
+                comparison_mode,
+                key=None,
+                max_threshold=output_threshold["max"],
+                mean_threshold=output_threshold["mean"],
+            )
+        else:
+            keras_type = type(keras_output).__name__
+            litert_type = type(litert_output).__name__
+            self.fail(
+                f"Output structure mismatch: Keras returns "
+                f"{keras_type}, LiteRT returns {litert_type}"
+            )
+    def run_litert_export_test(
+        self,
+        cls=None,
+        init_kwargs=None,
+        input_data=None,
+        expected_output_shape=None,
+        model=None,
+        verify_numerics=True,
+        # No LiteRT output in model saving test; remove undefined return
+        output_thresholds=None,
+        **export_kwargs,
+    ):
+        """Export model to LiteRT format and verify outputs.
+        Args:
+            cls: Model class to test (optional if model is provided)
+            init_kwargs: Initialization arguments for the model (optional
+                if model is provided)
+            input_data: Input data to test with (dict or tensor)
+            expected_output_shape: Expected output shape from LiteRT inference
+            model: Pre-created model instance (optional, if provided cls and
+                init_kwargs are ignored)
+            verify_numerics: Whether to verify numerical correctness
+                between Keras and LiteRT outputs. Set to False for preset
+                models with load_weights=False where outputs are random.
+            comparison_mode: "strict" (default) or "statistical".
+                - "strict": All elements must be within default tolerances
+                    (1e-6)
+                - "statistical": Check mean/max absolute differences against
+                    provided thresholds
+            output_thresholds: Dict mapping output names to threshold dicts
+                with "max" and "mean" keys. Use "*" as wildcard for defaults.
+                Example: {"output1": {"max": 1e-4, "mean": 1e-5},
+                         "*": {"max": 1e-3, "mean": 1e-4}}
+            **export_kwargs: Additional keyword arguments to pass to
+                model.export(), such as allow_custom_ops=True or
+                enable_select_tf_ops=True.
+        """
+        # Skip test if Keras version is less than 3.13
+        if packaging.version.Version(
+            keras.__version__
+        ) < packaging.version.Version("3.13.0"):
+            self.skipTest("LiteRT export requires Keras >= 3.13")
+        self.skipTest(
+            "#TODO: [#2572] Re-enable LiteRT tests after a new tf release. "
+            "Can't test with tf 2.20 due to tf.lite module deprecation."
+        )
+        # Extract comparison_mode from export_kwargs if provided
+        comparison_mode = export_kwargs.pop("comparison_mode", "strict")
+        if keras.backend.backend() != "tensorflow":
+            self.skipTest("LiteRT export only supports TensorFlow backend")
+        try:
+            from ai_edge_litert.interpreter import Interpreter
+        except ImportError:
+            Interpreter = tf.lite.Interpreter
+        if output_thresholds is None:
+            output_thresholds = {"*": {"max": 10.0, "mean": 0.1}}
+        if model is None:
+            if cls is None or init_kwargs is None:
+                raise ValueError(
+                    "Either 'model' or 'cls' and 'init_kwargs' must be provided"
+                )
+            model = cls(**init_kwargs)
+            _ = model(input_data)
+        interpreter = None
+        try:
+            with tempfile.TemporaryDirectory() as temp_dir:
+                export_path = os.path.join(temp_dir, "model.tflite")
+                # Step 1: Export model and get Keras output
+                model.export(export_path, format="litert", **export_kwargs)
+                self.assertTrue(os.path.exists(export_path))
+                self.assertGreater(os.path.getsize(export_path), 0)
+                keras_output = model(input_data) if verify_numerics else None
+                # Step 2: Load interpreter and verify SignatureDef
+                interpreter = Interpreter(model_path=export_path)
+                signature_defs = interpreter.get_signature_list()
+                self.assertIn(
+                    "serving_default",
+                    signature_defs,
+                    "Missing serving_default signature",
+                )
+                serving_sig = signature_defs["serving_default"]
+                sig_inputs = serving_sig.get("inputs", [])
+                sig_outputs = serving_sig.get("outputs", [])
+                self.assertGreater(
+                    len(sig_inputs),
+                    0,
+                    "Should have at least one input in SignatureDef",
+                )
+                self.assertGreater(
+                    len(sig_outputs),
+                    0,
+                    "Should have at least one output in SignatureDef",
+                )
+                # Verify input signature
+                if isinstance(input_data, dict):
+                    expected_inputs = set(input_data.keys())
+                    actual_inputs = set(sig_inputs)
+                    # Check that all expected inputs are in the signature
+                    # (allow signature to have additional optional inputs)
+                    missing_inputs = expected_inputs - actual_inputs
+                    if missing_inputs:
+                        self.fail(
+                            f"Missing inputs in SignatureDef: "
+                            f"{sorted(missing_inputs)}. "
+                            f"Expected: {sorted(expected_inputs)}, "
+                            f"SignatureDef has: {sorted(actual_inputs)}"
+                        )
+                else:
+                    # For numpy arrays, just verify we have exactly one input
+                    # (since we're passing a single tensor)
+                    if len(sig_inputs) != 1:
+                        self.fail(
+                            "Expected 1 input for numpy array input_data, "
+                            f"but SignatureDef has {len(sig_inputs)}: "
+                            f"{sig_inputs}"
+                        )
+                # Verify output signature
+                if verify_numerics and isinstance(keras_output, dict):
+                    expected_outputs = set(keras_output.keys())
+                    actual_outputs = set(sig_outputs)
+                    if expected_outputs != actual_outputs:
+                        self.fail(
+                            f"Output name mismatch: Expected "
+                            f"{sorted(expected_outputs)}, "
+                            f"but SignatureDef has {sorted(actual_outputs)}"
+                        )
+                # Step 3: Run LiteRT inference
+                os.remove(export_path)
+                # Simple inference implementation
+                runner = interpreter.get_signature_runner("serving_default")
+                # Convert input data dtypes to match TFLite expectations
+                def convert_for_tflite(x):
+                    """Convert tensor/array to TFLite-compatible dtypes."""
+                    if hasattr(x, "dtype"):
+                        if isinstance(x, np.ndarray):
+                            if x.dtype == bool:
+                                return x.astype(np.int32)
+                            elif x.dtype == np.float64:
+                                return x.astype(np.float32)
+                            elif x.dtype == np.int64:
+                                return x.astype(np.int32)
+                        else:  # TensorFlow tensor
+                            if x.dtype == tf.bool:
+                                return ops.cast(x, "int32").numpy()
+                            elif x.dtype == tf.float64:
+                                return ops.cast(x, "float32").numpy()
+                            elif x.dtype == tf.int64:
+                                return ops.cast(x, "int32").numpy()
+                            else:
+                                return x.numpy() if hasattr(x, "numpy") else x
+                    elif hasattr(x, "numpy"):
+                        return x.numpy()
+                    return x
+                if isinstance(input_data, dict):
+                    converted_input_data = tree.map_structure(
+                        convert_for_tflite, input_data
+                    )
+                    litert_output = runner(**converted_input_data)
+                else:
+                    # For single tensor inputs, get the input name
+                    sig_inputs = serving_sig.get("inputs", [])
+                    input_name = sig_inputs[
+                        0
+                    ]  # We verified len(sig_inputs) == 1 above
+                    converted_input = convert_for_tflite(input_data)
+                    litert_output = runner(**{input_name: converted_input})
+                # Step 4: Verify outputs
+                self._verify_litert_outputs(
+                    keras_output,
+                    litert_output,
+                    sig_outputs,
+                    expected_output_shape=expected_output_shape,
+                    verify_numerics=verify_numerics,
+                    comparison_mode=comparison_mode,
+                    output_thresholds=output_thresholds,
+                )
+        finally:
+            if interpreter is not None:
+                del interpreter
+            if model is not None and cls is not None:
+                del model
+            gc.collect()
+    def _compare_outputs(
+        self,
+        keras_val,
+        litert_val,
+        comparison_mode,
+        key=None,
+        max_threshold=10.0,
+        mean_threshold=0.1,
+    ):
+        """Compare Keras and LiteRT outputs using specified comparison mode.
+        Args:
+            keras_val: Keras model output (numpy array)
+            litert_val: LiteRT model output (numpy array)
+            comparison_mode: "strict" or "statistical"
+            key: Output key name for error messages (optional)
+            max_threshold: Maximum absolute difference threshold for statistical
+                mode
+            mean_threshold: Mean absolute difference threshold for statistical
+                mode
+        """
+        key_msg = f" for output key '{key}'" if key else ""
+        # Check if shapes are compatible for comparison
+        self.assertEqual(
+            keras_val.shape,
+            litert_val.shape,
+            f"Shape mismatch{key_msg}: Keras shape "
+            f"{keras_val.shape}, LiteRT shape {litert_val.shape}. "
+            "Numerical comparison cannot proceed due to incompatible shapes.",
+        )
+        if comparison_mode == "strict":
+            # Original strict element-wise comparison with default tolerances
+            self.assertAllClose(
+                keras_val,
+                litert_val,
+                atol=1e-6,
+                rtol=1e-6,
+                msg=f"Mismatch{key_msg}",
+            )
+        elif comparison_mode == "statistical":
+            # Statistical comparison
+            # Calculate element-wise absolute differences
+            abs_diff = np.abs(keras_val - litert_val)
+            # Element-wise statistics
+            mean_abs_diff = np.mean(abs_diff)
+            max_abs_diff = np.max(abs_diff)
+            # Assert reasonable bounds on statistical differences
+            self.assertLessEqual(
+                mean_abs_diff,
+                mean_threshold,
+                f"Mean absolute difference too high: {mean_abs_diff:.6e}"
+                f"{key_msg} (threshold: {mean_threshold})",
+            )
+            self.assertLessEqual(
+                max_abs_diff,
+                max_threshold,
+                f"Max absolute difference too high: {max_abs_diff:.6e}"
+                f"{key_msg} (threshold: {max_threshold})",
+            )
+        else:
+            raise ValueError(
+                f"Unknown comparison_mode: {comparison_mode}. Must be "
+                "'strict' or 'statistical'"
+            )
     def run_backbone_test(
         self,
         cls,

keras_hub/src/tokenizers/byte_pair_tokenizer.py CHANGED Viewed

@@ -11,6 +11,7 @@ from typing import Iterable
 import keras
 import regex as re
+from keras.src.saving import serialization_lib
 from keras_hub.src.api_export import keras_hub_export
 from keras_hub.src.tokenizers import tokenizer
@@ -21,9 +22,11 @@ from keras_hub.src.utils.tensor_utils import preprocessing_function
 try:
     import tensorflow as tf
-    import tensorflow_text as tf_text
 except ImportError:
     tf = None
+try:
+    import tensorflow_text as tf_text
+except ImportError:
     tf_text = None
 VOCAB_FILENAME = "vocabulary.json"
@@ -135,7 +138,13 @@ def split_strings_for_bpe(inputs, unsplittable_tokens=None):
     return remove_strings_from_inputs(raw_tokens, "६")
-class BytePairTokenizerCache(tf.Module if tf is not None else object):
+try:
+    _base_class = tf.Module
+except (AttributeError, TypeError):
+    _base_class = object
+class BytePairTokenizerCache(_base_class):
     """Cache that stores the encoded result of seen tokens.
     The cache key is string tensor or python strings, and the value is split
@@ -331,6 +340,17 @@ class BytePairTokenizer(tokenizer.Tokenizer):
             return
         if isinstance(vocabulary, str):
+            if serialization_lib.in_safe_mode():
+                raise ValueError(
+                    "Requested the loading of a vocabulary file outside of the "
+                    "model archive. This carries a potential risk of loading "
+                    "arbitrary and sensitive files and thus it is disallowed "
+                    "by default. If you trust the source of the artifact, you "
+                    "can override this error by passing `safe_mode=False` to "
+                    "the loading function, or calling "
+                    "`keras.config.enable_unsafe_deserialization()`. "
+                    f"Vocabulary file: '{vocabulary}'"
+                )
             with open(vocabulary, "r", encoding="utf-8") as f:
                 self.vocabulary = json.load(f)
         elif isinstance(vocabulary, dict):
@@ -342,6 +362,17 @@ class BytePairTokenizer(tokenizer.Tokenizer):
                 f"`type(vocabulary)={type(vocabulary)}`."
             )
         if isinstance(merges, str):
+            if serialization_lib.in_safe_mode():
+                raise ValueError(
+                    "Requested the loading of a merges file outside of the "
+                    "model archive. This carries a potential risk of loading "
+                    "arbitrary and sensitive files and thus it is disallowed "
+                    "by default. If you trust the source of the artifact, you "
+                    "can override this error by passing `safe_mode=False` to "
+                    "the loading function, or calling "
+                    "`keras.config.enable_unsafe_deserialization()`. "
+                    f"Merges file: '{merges}'"
+                )
             with open(merges, encoding="utf-8") as f:
                 self.merges = [bp.rstrip() for bp in f]
         elif isinstance(merges, Iterable):

keras_hub/src/tokenizers/byte_tokenizer.py CHANGED Viewed

@@ -8,9 +8,11 @@ from keras_hub.src.utils.tensor_utils import preprocessing_function
 try:
     import tensorflow as tf
-    import tensorflow_text as tf_text
 except ImportError:
     tf = None
+try:
+    import tensorflow_text as tf_text
+except ImportError:
     tf_text = None

keras_hub/src/tokenizers/sentence_piece_tokenizer.py CHANGED Viewed

@@ -3,6 +3,7 @@ import binascii
 import os
 import keras
+from keras.src.saving import serialization_lib
 from keras_hub.src.api_export import keras_hub_export
 from keras_hub.src.tokenizers import tokenizer
@@ -14,9 +15,11 @@ from keras_hub.src.utils.tensor_utils import tensor_to_list
 try:
     import tensorflow as tf
-    import tensorflow_text as tf_text
 except ImportError:
     tf = None
+try:
+    import tensorflow_text as tf_text
+except ImportError:
     tf_text = None
 VOCAB_FILENAME = "vocabulary.spm"
@@ -145,6 +148,17 @@ class SentencePieceTokenizer(tokenizer.Tokenizer):
                 except binascii.Error:
                     pass
             if not is_base64:
+                if serialization_lib.in_safe_mode():
+                    raise ValueError(
+                        "Requested the loading of a proto file outside of "
+                        "the model archive. This carries a potential risk of "
+                        "loading arbitrary and sensitive files and thus it is "
+                        "disallowed by default. If you trust the source of the "
+                        "artifact, you can override this error by passing "
+                        "`safe_mode=False` to the loading function, or calling "
+                        "`keras.config.enable_unsafe_deserialization()`. "
+                        f"Proto file: '{proto}'"
+                    )
                 proto_bytes = open(proto, "rb").read()
         elif isinstance(proto, bytes):
             proto_bytes = proto

keras_hub/src/tokenizers/unicode_codepoint_tokenizer.py CHANGED Viewed

@@ -6,9 +6,11 @@ from keras_hub.src.utils.tensor_utils import preprocessing_function
 try:
     import tensorflow as tf
-    import tensorflow_text as tf_text
 except ImportError:
     tf = None
+try:
+    import tensorflow_text as tf_text
+except ImportError:
     tf_text = None

keras-hub 0.25.0.dev0__py3-none-any.whl → 0.26.0.dev0__py3-none-any.whl

keras-hub 0.25.0.dev0py3-none-any.whl → 0.26.0.dev0py3-none-any.whl