PyPI - keras-rs-nightly - Versions diffs - 0.2.2.dev202508190331__py3-none-any.whl → 0.4.1.dev202601250348__py3-none-any.whl - Mend

keras-rs-nightly 0.2.2.dev202508190331py3-none-any.whl → 0.4.1.dev202601250348py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

keras_rs/src/layers/embedding/jax/embedding_utils.py CHANGED Viewed

@@ -1,17 +1,13 @@
 """Utility functions for manipulating JAX embedding tables and inputs."""
 import collections
-import dataclasses
-import typing
 from typing import Any, Mapping, NamedTuple, Sequence, TypeAlias, TypeVar
 import jax
 import numpy as np
-from jax import numpy as jnp
 from jax_tpu_embedding.sparsecore.lib.nn import embedding
+from jax_tpu_embedding.sparsecore.lib.nn import table_stacking
 from jax_tpu_embedding.sparsecore.lib.nn.embedding_spec import FeatureSpec
-from jax_tpu_embedding.sparsecore.lib.nn.embedding_spec import StackedTableSpec
-from jax_tpu_embedding.sparsecore.lib.nn.embedding_spec import TableSpec
 from keras_rs.src.types import Nested
@@ -24,7 +20,7 @@ Shape: TypeAlias = tuple[int, ...]
 class FeatureSamples(NamedTuple):
     tokens: ArrayLike
-    weights: ArrayLike
+    weights: ArrayLike | None
 class ShardedCooMatrix(NamedTuple):
@@ -35,357 +31,6 @@ class ShardedCooMatrix(NamedTuple):
     values: ArrayLike
-def _round_up_to_multiple(value: int, multiple: int) -> int:
-    return ((value + multiple - 1) // multiple) * multiple
-def _default_stacked_table_spec(
-    table_spec: TableSpec, num_shards: int, batch_size: int
-) -> StackedTableSpec:
-    return StackedTableSpec(
-        stack_name=table_spec.name,
-        stack_vocab_size=_round_up_to_multiple(
-            table_spec.vocabulary_size, 8 * num_shards
-        ),
-        stack_embedding_dim=_round_up_to_multiple(table_spec.embedding_dim, 8),
-        optimizer=table_spec.optimizer,
-        combiner=table_spec.combiner,
-        total_sample_count=batch_size,
-        max_ids_per_partition=table_spec.max_ids_per_partition,
-        max_unique_ids_per_partition=table_spec.max_unique_ids_per_partition,
-    )
-def _get_stacked_table_spec(
-    table_spec: TableSpec, num_shards: int, batch_size: int = 0
-) -> StackedTableSpec:
-    return table_spec.stacked_table_spec or _default_stacked_table_spec(
-        table_spec, num_shards, batch_size
-    )
-def pad_table(
-    table_spec: TableSpec,
-    table_values: jax.Array,
-    num_shards: int,
-    pad_value: jnp.float32 = jnp.nan,
-) -> jax.Array:
-    """Adds appropriate padding to a table to prepare for stacking.
-    Args:
-        table_spec: Table specification describing the table to pad.
-        table_values: Table values array to pad.
-        num_shards: Number of shards in the table (typically
-            `global_device_count * num_sc_per_device`).
-        pad_value: Value to use for padding.
-    Returns:
-        Padded table values.
-    """
-    vocabulary_size = table_spec.vocabulary_size
-    embedding_dim = table_spec.embedding_dim
-    padded_vocabulary_size = _round_up_to_multiple(
-        vocabulary_size, 8 * num_shards
-    )
-    stack_embedding_dim = _get_stacked_table_spec(
-        table_spec, num_shards
-    ).stack_embedding_dim
-    return jnp.pad(
-        table_values,
-        (
-            (0, padded_vocabulary_size - vocabulary_size),
-            (0, stack_embedding_dim - embedding_dim),
-        ),
-        constant_values=pad_value,
-    )
-def _stack_and_shard_table(
-    stacked_table: jax.Array,
-    table_spec: TableSpec,
-    table: jax.Array,
-    num_shards: int,
-    pad_value: jnp.float32,
-) -> jax.Array:
-    """Stacks and shards a single table for use in sparsecore lookups."""
-    padded_values = pad_table(table_spec, table, num_shards, pad_value)
-    sharded_padded_vocabulary_size = padded_values.shape[0] // num_shards
-    stack_embedding_dim = stacked_table.shape[-1]
-    # Mod-shard vocabulary across devices.
-    sharded_values = jnp.swapaxes(
-        padded_values.reshape(-1, num_shards, stack_embedding_dim),
-        0,
-        1,
-    )
-    # Rotate shards.
-    setting_in_stack = table_spec.setting_in_stack
-    rotated_values = jnp.roll(
-        sharded_values, setting_in_stack.shard_rotation, axis=0
-    )
-    # Insert table into the stack.
-    table_row = setting_in_stack.row_offset_in_shard
-    stacked_table = stacked_table.at[
-        :, table_row : (table_row + sharded_padded_vocabulary_size), :
-    ].set(rotated_values)
-    return stacked_table
-def stack_and_shard_tables(
-    table_specs: Nested[TableSpec],
-    tables: Nested[ArrayLike],
-    num_shards: int,
-    pad_value: jnp.float32 = jnp.nan,
-) -> dict[str, Nested[jax.Array]]:
-    """Stacks and shards tables for use in sparsecore lookups.
-    Args:
-        table_specs: Nested collection of unstacked table specifications.
-        tables: Table values corresponding to the table_specs.
-        num_shards: Number of shards in the table (typically
-            `global_device_count * num_sc_per_device`).
-        pad_value: Value to use for padding.
-    Returns:
-        A mapping of stacked table names to stacked table values.
-    """
-    # Gather stacked table information.
-    stacked_table_map: dict[
-        str,
-        tuple[StackedTableSpec, list[TableSpec]],
-    ] = {}
-    def collect_stacked_tables(table_spec: TableSpec) -> None:
-        stacked_table_spec = _get_stacked_table_spec(table_spec, num_shards)
-        stacked_table_name = stacked_table_spec.stack_name
-        if stacked_table_name not in stacked_table_map:
-            stacked_table_map[stacked_table_name] = (stacked_table_spec, [])
-        stacked_table_map[stacked_table_name][1].append(table_spec)
-    _ = jax.tree.map(collect_stacked_tables, table_specs)
-    table_map: dict[str, Nested[jax.Array]] = {}
-    def collect_tables(table_spec: TableSpec, table: Nested[jax.Array]) -> None:
-        table_map[table_spec.name] = table
-    _ = jax.tree.map(collect_tables, table_specs, tables)
-    stacked_tables: dict[str, Nested[jax.Array]] = {}
-    for (
-        stacked_table_spec,
-        table_specs,
-    ) in stacked_table_map.values():
-        stack_vocab_size = stacked_table_spec.stack_vocab_size
-        sharded_vocab_size = stack_vocab_size // num_shards
-        stack_embedding_dim = stacked_table_spec.stack_embedding_dim
-        # Allocate initial buffer.  The stacked table will be divided among
-        # shards by splitting the vocabulary dimension:
-        #   [ v, e ] -> [s, v/s, e]
-        stacked_table_tree = jax.tree.map(
-            lambda _: jnp.zeros(
-                # pylint: disable-next=cell-var-from-loop, used only in loop body.
-                shape=(num_shards, sharded_vocab_size, stack_embedding_dim),
-                dtype=jnp.float32,
-            ),
-            table_map[table_specs[0].name],
-        )
-        for table_spec in table_specs:
-            table_tree = table_map[table_spec.name]
-            stacked_table_tree = jax.tree.map(
-                lambda stacked_table, table: _stack_and_shard_table(
-                    # pylint: disable-next=cell-var-from-loop, used only in loop body.
-                    stacked_table,
-                    # pylint: disable-next=cell-var-from-loop, used only in loop body.
-                    table_spec,
-                    table,
-                    num_shards,
-                    pad_value,
-                ),
-                stacked_table_tree,
-                table_tree,
-            )
-        stacked_tables[stacked_table_spec.stack_name] = stacked_table_tree
-    return stacked_tables
-def _unshard_and_unstack_table(
-    table_spec: TableSpec,
-    stacked_table_tree: Nested[jax.Array],
-    num_shards: int,
-) -> Nested[jax.Array]:
-    """Unshards and unstacks a single table."""
-    vocabulary_size = table_spec.vocabulary_size
-    embedding_dim = table_spec.embedding_dim
-    def _unshard_and_unstack_single_table(
-        table_spec: TableSpec, stacked_table: jax.Array
-    ) -> jax.Array:
-        stack_embedding_dim = stacked_table.shape[-1]
-        # Maybe re-shape in case it was flattened.
-        stacked_table = stacked_table.reshape(
-            num_shards, -1, stack_embedding_dim
-        )
-        sharded_vocabulary_size = (
-            _round_up_to_multiple(vocabulary_size, 8 * num_shards) // num_shards
-        )
-        # Extract padded values from the stacked table.
-        setting_in_stack = table_spec.setting_in_stack
-        row = setting_in_stack.row_offset_in_shard
-        padded_values = stacked_table[
-            :, row : (row + sharded_vocabulary_size), :
-        ]
-        # Un-rotate shards.
-        padded_values = jnp.roll(
-            padded_values, -setting_in_stack.shard_rotation, axis=0
-        )
-        # Un-mod-shard.
-        padded_values = jnp.swapaxes(padded_values, 0, 1).reshape(
-            -1, stack_embedding_dim
-        )
-        # Un-pad.
-        return padded_values[:vocabulary_size, :embedding_dim]
-    output: Nested[jax.Array] = jax.tree.map(
-        lambda stacked_table: _unshard_and_unstack_single_table(
-            table_spec, stacked_table
-        ),
-        stacked_table_tree,
-    )
-    return output
-def unshard_and_unstack_tables(
-    table_specs: Nested[TableSpec],
-    stacked_tables: Mapping[str, Nested[jax.Array]],
-    num_shards: int,
-) -> Nested[jax.Array]:
-    """Unshards and unstacks a collection of tables.
-    Args:
-        table_specs: Nested collection of unstacked table specifications.
-        stacked_tables: Mapping of stacked table names to stacked table values.
-        num_shards: Number of shards in the table (typically
-            `global_device_count * num_sc_per_device`).
-    Returns:
-        A mapping of table names to unstacked table values.
-    """
-    output: Nested[jax.Array] = jax.tree.map(
-        lambda table_spec: _unshard_and_unstack_table(
-            table_spec,
-            stacked_tables[
-                _get_stacked_table_spec(table_spec, num_shards=1).stack_name
-            ],
-            num_shards,
-        ),
-        table_specs,
-    )
-    return output
-def get_table_specs(feature_specs: Nested[FeatureSpec]) -> dict[str, TableSpec]:
-    table_spec_map: dict[str, TableSpec] = {}
-    flat_feature_specs, _ = jax.tree.flatten(feature_specs)
-    for feature_spec in flat_feature_specs:
-        table_spec = feature_spec.table_spec
-        table_spec_map[table_spec.name] = table_spec
-    return table_spec_map
-def get_table_stacks(
-    table_specs: Nested[TableSpec],
-) -> dict[str, list[TableSpec]]:
-    """Extracts lists of tables that are stacked together.
-    Args:
-        table_specs: Nested collection of table specifications.
-    Returns:
-        A mapping of stacked table names to lists of table specifications for
-        each stack.
-    """
-    stacked_table_specs: dict[str, list[TableSpec]] = collections.defaultdict(
-        list
-    )
-    flat_table_specs, _ = jax.tree.flatten(table_specs)
-    for table_spec in flat_table_specs:
-        table_spec = typing.cast(TableSpec, table_spec)
-        stacked_table_spec = table_spec.stacked_table_spec
-        if stacked_table_spec is not None:
-            stacked_table_specs[stacked_table_spec.stack_name].append(
-                table_spec
-            )
-        else:
-            stacked_table_specs[table_spec.name].append(table_spec)
-    return stacked_table_specs
-def update_stacked_table_specs(
-    feature_specs: Nested[FeatureSpec],
-    max_ids_per_partition: Mapping[str, int],
-    max_unique_ids_per_partition: Mapping[str, int],
-) -> None:
-    """Updates properties in the supplied feature specs.
-    Args:
-        feature_specs: Feature specs to update in-place.
-        max_ids_per_partition: Mapping of table stack name to
-            new `max_ids_per_partition` for the stack.
-        max_unique_ids_per_partition: Mapping of table stack name to
-            new `max_unique_ids_per_partition` for the stack.
-    """
-    # Collect table specs and stacked table specs.
-    table_specs: dict[str, TableSpec] = {}
-    for feature_spec in jax.tree.flatten(feature_specs)[0]:
-        feature_spec = typing.cast(FeatureSpec, feature_spec)
-        table_specs[feature_spec.table_spec.name] = feature_spec.table_spec
-    stacked_table_specs: dict[str, StackedTableSpec] = {}
-    for table_spec in table_specs.values():
-        stacked_table_spec = typing.cast(
-            StackedTableSpec, table_spec.stacked_table_spec
-        )
-        stacked_table_specs[stacked_table_spec.stack_name] = stacked_table_spec
-    # Replace fields in the stacked_table_specs.
-    stacked_table_specs = {
-        stack_name: dataclasses.replace(
-            stacked_table_spec,
-            max_ids_per_partition=max_ids_per_partition[
-                stacked_table_spec.stack_name
-            ],
-            max_unique_ids_per_partition=max_unique_ids_per_partition[
-                stacked_table_spec.stack_name
-            ],
-        )
-        for stack_name, stacked_table_spec in stacked_table_specs.items()
-    }
-    # Insert new stacked tables into tables.
-    for table_spec in table_specs.values():
-        stacked_table_spec = typing.cast(
-            StackedTableSpec, table_spec.stacked_table_spec
-        )
-        table_spec.stacked_table_spec = stacked_table_specs[
-            stacked_table_spec.stack_name
-        ]
 def convert_to_numpy(
     ragged_or_dense: np.ndarray[Any, Any] | Sequence[Sequence[Any]] | Any,
     dtype: Any,
@@ -439,36 +84,6 @@ def convert_to_numpy(
         )
-def ones_like(
-    ragged_or_dense: np.ndarray[Any, Any], dtype: Any = None
-) -> np.ndarray[Any, Any]:
-    """Creates an array of ones the same as as the input.
-    This differs from traditional numpy in that a ragged input will lead to
-    a resulting ragged array of ones, whereas np.ones_like(...) will instead
-    only consider the outer array and return a 1D dense array of ones.
-    Args:
-        ragged_or_dense: The ragged or dense input whose shape and data-type
-              define these same attributes of the returned array.
-        dtype: The data-type of the returned array.
-    Returns:
-        An array of ones with the same shape as the input, and specified data
-        type.
-    """
-    dtype = dtype or ragged_or_dense.dtype
-    if ragged_or_dense.dtype == np.ndarray:
-        # Ragged.
-        return np.array(
-            [np.ones_like(row, dtype=dtype) for row in ragged_or_dense],
-            dtype=np.ndarray,
-        )
-    else:
-        # Dense.
-        return np.ones_like(ragged_or_dense, dtype=dtype)
 def create_feature_samples(
     feature_structure: Nested[T],
     feature_ids: Nested[ArrayLike | Sequence[int] | Sequence[Sequence[int]]],
@@ -496,18 +111,17 @@ def create_feature_samples(
     )
     if feature_weights is None:
-        # Make ragged or dense ones_like.
-        feature_weights = jax.tree.map(
-            lambda _, ids: ones_like(ids, np.float32),
+        return jax.tree.map(  # type: ignore[no-any-return]
+            lambda _, ids: FeatureSamples(ids, None),
             feature_structure,
             feature_ids,
         )
-    else:
-        feature_weights = jax.tree.map(
-            lambda _, wgts: convert_to_numpy(wgts, np.float32),
-            feature_structure,
-            feature_weights,
-        )
+    feature_weights = jax.tree.map(
+        lambda _, wgts: convert_to_numpy(wgts, np.float32),
+        feature_structure,
+        feature_weights,
+    )
     # Assemble.
     def _create_feature_samples(
@@ -544,8 +158,8 @@ def stack_and_shard_samples(
         global_device_count: Number of global JAX devices.
         num_sc_per_device: Number of sparsecores per device.
         static_buffer_size: The static buffer size to use for the samples.
-            Defaults to None, in which case an upper-bound for the buffer size
-            will be automatically determined.
+          Defaults to None, in which case an upper-bound for the buffer size
+          will be automatically determined.
     Returns:
         The preprocessed inputs, and statistics useful for updating FeatureSpecs
@@ -555,17 +169,21 @@ def stack_and_shard_samples(
     flat_feature_specs, _ = jax.tree.flatten(feature_specs)
     feature_tokens = []
-    feature_weights = []
+    collected_weights = []
     def collect_tokens_and_weights(
         feature_spec: FeatureSpec, samples: FeatureSamples
     ) -> None:
         del feature_spec
         feature_tokens.append(samples.tokens)
-        feature_weights.append(samples.weights)
+        collected_weights.append(samples.weights)
     jax.tree.map(collect_tokens_and_weights, feature_specs, feature_samples)
+    feature_weights = (
+        None if all(w is None for w in collected_weights) else collected_weights
+    )
     preprocessed_inputs, stats = embedding.preprocess_sparse_dense_matmul_input(
         feature_tokens,
         feature_weights,
@@ -583,7 +201,10 @@ def stack_and_shard_samples(
     for table_name in tables_names:
         shard_ends = preprocessed_inputs.lhs_row_pointers[table_name]
         shard_starts = np.concatenate(
-            [np.asarray([0]), _round_up_to_multiple(shard_ends[:-1], 8)]
+            [
+                np.asarray([0]),
+                table_stacking._next_largest_multiple(shard_ends[:-1], 8),
+            ]
         )
         out[table_name] = ShardedCooMatrix(
             shard_starts=shard_starts,

keras_rs/src/layers/embedding/tensorflow/config_conversion.py CHANGED Viewed

@@ -53,7 +53,7 @@ OPTIMIZER_MAPPINGS = {
 # KerasRS to TensorFlow
-def translate_keras_rs_configuration(
+def keras_to_tf_tpu_configuration(
     feature_configs: types.Nested[FeatureConfig],
     table_stacking: str | Sequence[str] | Sequence[Sequence[str]],
     num_replicas_in_sync: int,
@@ -66,14 +66,15 @@ def translate_keras_rs_configuration(
     Args:
       feature_configs: The nested Keras RS feature configs.
       table_stacking: The Keras RS table stacking.
+      num_replicas_in_sync: The number of replicas in sync from the strategy.
     Returns:
       A tuple containing the TensorFlow TPU feature configs and the TensorFlow
       TPU sparse core embedding config.
     """
-    tables: dict[TableConfig, tf.tpu.experimental.embedding.TableConfig] = {}
+    tables: dict[int, tf.tpu.experimental.embedding.TableConfig] = {}
     feature_configs = keras.tree.map_structure(
-        lambda f: translate_keras_rs_feature_config(
+        lambda f: keras_to_tf_tpu_feature_config(
             f, tables, num_replicas_in_sync
         ),
         feature_configs,
@@ -108,9 +109,9 @@ def translate_keras_rs_configuration(
     return feature_configs, sparse_core_embedding_config
-def translate_keras_rs_feature_config(
+def keras_to_tf_tpu_feature_config(
     feature_config: FeatureConfig,
-    tables: dict[TableConfig, tf.tpu.experimental.embedding.TableConfig],
+    tables: dict[int, tf.tpu.experimental.embedding.TableConfig],
     num_replicas_in_sync: int,
 ) -> tf.tpu.experimental.embedding.FeatureConfig:
     """Translates a Keras RS feature config to a TensorFlow TPU feature config.
@@ -120,7 +121,8 @@ def translate_keras_rs_feature_config(
     Args:
       feature_config: The Keras RS feature config to translate.
-      tables: A mapping of KerasRS table configs to TF TPU table configs.
+      tables: A mapping of KerasRS table config ids to TF TPU table configs.
+      num_replicas_in_sync: The number of replicas in sync from the strategy.
     Returns:
       The TensorFlow TPU feature config.
@@ -131,10 +133,10 @@ def translate_keras_rs_feature_config(
             f"but got {num_replicas_in_sync}."
         )
-    table = tables.get(feature_config.table, None)
+    table = tables.get(id(feature_config.table), None)
     if table is None:
-        table = translate_keras_rs_table_config(feature_config.table)
-        tables[feature_config.table] = table
+        table = keras_to_tf_tpu_table_config(feature_config.table)
+        tables[id(feature_config.table)] = table
     if len(feature_config.output_shape) < 2:
         raise ValueError(
@@ -168,7 +170,7 @@ def translate_keras_rs_feature_config(
     )
-def translate_keras_rs_table_config(
+def keras_to_tf_tpu_table_config(
     table_config: TableConfig,
 ) -> tf.tpu.experimental.embedding.TableConfig:
     initializer = table_config.initializer
@@ -179,13 +181,13 @@ def translate_keras_rs_table_config(
         vocabulary_size=table_config.vocabulary_size,
         dim=table_config.embedding_dim,
         initializer=initializer,
-        optimizer=translate_optimizer(table_config.optimizer),
+        optimizer=to_tf_tpu_optimizer(table_config.optimizer),
         combiner=table_config.combiner,
         name=table_config.name,
     )
-def translate_keras_optimizer(
+def keras_to_tf_tpu_optimizer(
     optimizer: keras.optimizers.Optimizer,
 ) -> TfTpuOptimizer:
     """Translates a Keras optimizer to a TensorFlow TPU `_Optimizer`.
@@ -238,7 +240,12 @@ def translate_keras_optimizer(
             "Unsupported optimizer option `Optimizer.loss_scale_factor`."
         )
-    optimizer_mapping = OPTIMIZER_MAPPINGS.get(type(optimizer), None)
+    optimizer_mapping = None
+    for optimizer_class, mapping in OPTIMIZER_MAPPINGS.items():
+        # Handle subclasses of the main optimizer class.
+        if isinstance(optimizer, optimizer_class):
+            optimizer_mapping = mapping
+            break
     if optimizer_mapping is None:
         raise ValueError(
             f"Unsupported optimizer type {type(optimizer)}. Optimizer must be "
@@ -258,7 +265,7 @@ def translate_keras_optimizer(
     return optimizer_mapping.tpu_optimizer_class(**tpu_optimizer_kwargs)
-def translate_optimizer(
+def to_tf_tpu_optimizer(
     optimizer: str | keras.optimizers.Optimizer | TfTpuOptimizer | None,
 ) -> TfTpuOptimizer:
     """Translates a Keras optimizer into a TensorFlow TPU `_Optimizer`.
@@ -299,7 +306,7 @@ def translate_optimizer(
                 "'sgd', 'adagrad', 'adam', or 'ftrl'"
             )
     elif isinstance(optimizer, keras.optimizers.Optimizer):
-        return translate_keras_optimizer(optimizer)
+        return keras_to_tf_tpu_optimizer(optimizer)
     else:
         raise ValueError(
             f"Unknown optimizer type {type(optimizer)}. Please pass an "
@@ -312,7 +319,7 @@ def translate_optimizer(
 # TensorFlow to TensorFlow
-def clone_tf_feature_configs(
+def clone_tf_tpu_feature_configs(
     feature_configs: types.Nested[tf.tpu.experimental.embedding.FeatureConfig],
 ) -> types.Nested[tf.tpu.experimental.embedding.FeatureConfig]:
     """Clones and resolves TensorFlow TPU feature configs.
@@ -327,7 +334,7 @@ def clone_tf_feature_configs(
     """
     table_configs_dict = {}
-    def clone_and_resolve_tf_feature_config(
+    def clone_and_resolve_tf_tpu_feature_config(
         fc: tf.tpu.experimental.embedding.FeatureConfig,
     ) -> tf.tpu.experimental.embedding.FeatureConfig:
         if fc.table not in table_configs_dict:
@@ -336,7 +343,7 @@ def clone_tf_feature_configs(
                     vocabulary_size=fc.table.vocabulary_size,
                     dim=fc.table.dim,
                     initializer=fc.table.initializer,
-                    optimizer=translate_optimizer(fc.table.optimizer),
+                    optimizer=to_tf_tpu_optimizer(fc.table.optimizer),
                     combiner=fc.table.combiner,
                     name=fc.table.name,
                     quantization_config=fc.table.quantization_config,
@@ -352,5 +359,5 @@ def clone_tf_feature_configs(
         )
     return keras.tree.map_structure(
-        clone_and_resolve_tf_feature_config, feature_configs
+        clone_and_resolve_tf_tpu_feature_config, feature_configs
     )

keras_rs/src/layers/embedding/tensorflow/distributed_embedding.py CHANGED Viewed

@@ -35,8 +35,15 @@ class DistributedEmbedding(base_distributed_embedding.DistributedEmbedding):
         table_stacking: (
             str | Sequence[str] | Sequence[Sequence[str]]
         ) = "auto",
+        update_stats: bool = False,
         **kwargs: Any,
     ) -> None:
+        # `update_stats` is supported only on JAX.
+        if update_stats:
+            raise ValueError(
+                "`update_stats` cannot be True for the TensorFlow backend."
+            )
         # Intercept arguments that are supported only on TensorFlow.
         self._optimizer = kwargs.pop("optimizer", None)
         self._pipeline_execution_with_tensor_core = kwargs.pop(
@@ -106,7 +113,7 @@ class DistributedEmbedding(base_distributed_embedding.DistributedEmbedding):
                     "for the configuration."
                 )
             self._tpu_feature_configs, self._sparse_core_embedding_config = (
-                config_conversion.translate_keras_rs_configuration(
+                config_conversion.keras_to_tf_tpu_configuration(
                     feature_configs,
                     table_stacking,
                     strategy.num_replicas_in_sync,
@@ -135,10 +142,10 @@ class DistributedEmbedding(base_distributed_embedding.DistributedEmbedding):
                     "supported with this TPU generation."
                 )
             self._tpu_feature_configs = (
-                config_conversion.clone_tf_feature_configs(feature_configs)
+                config_conversion.clone_tf_tpu_feature_configs(feature_configs)
             )
-        self._tpu_optimizer = config_conversion.translate_optimizer(
+        self._tpu_optimizer = config_conversion.to_tf_tpu_optimizer(
             self._optimizer
         )
@@ -281,8 +288,18 @@ class DistributedEmbedding(base_distributed_embedding.DistributedEmbedding):
     def _sparsecore_get_embedding_tables(self) -> dict[str, types.Tensor]:
         tables: dict[str, types.Tensor] = {}
         strategy = tf.distribute.get_strategy()
-        # 4 is the number of sparsecores per chip
-        num_shards = strategy.num_replicas_in_sync * 4
+        if not self._is_tpu_strategy(strategy):
+            raise RuntimeError(
+                "`DistributedEmbedding.get_embedding_tables` needs to be "
+                "called under the TPUStrategy that DistributedEmbedding was "
+                f"created with, but is being called under strategy {strategy}. "
+                "Please use `with strategy.scope()` when calling "
+                "`get_embedding_tables`."
+            )
+        tpu_hardware = strategy.extended.tpu_hardware_feature
+        num_sc_per_device = tpu_hardware.num_embedding_devices_per_chip
+        num_shards = strategy.num_replicas_in_sync * num_sc_per_device
         def populate_table(
             feature_config: tf.tpu.experimental.embedding.FeatureConfig,

keras-rs-nightly 0.2.2.dev202508190331__py3-none-any.whl → 0.4.1.dev202601250348__py3-none-any.whl

keras-rs-nightly 0.2.2.dev202508190331py3-none-any.whl → 0.4.1.dev202601250348py3-none-any.whl