PyPI - keras-rs-nightly - Versions diffs - 0.2.2.dev202509030321__tar.gz → 0.2.2.dev202509170322__tar.gz - Mend

keras-rs-nightly 0.2.2.dev202509030321tar.gz → 0.2.2.dev202509170322tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of keras-rs-nightly might be problematic. Click here for more details.

Files changed (61) hide show

{keras_rs_nightly-0.2.2.dev202509030321 → keras_rs_nightly-0.2.2.dev202509170322}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: keras-rs-nightly
-Version: 0.2.2.dev202509030321
+Version: 0.2.2.dev202509170322
 Summary: Multi-backend recommender systems with Keras 3.
 Author-email: Keras team <keras-users@googlegroups.com>
 License: Apache License 2.0
@@ -8,8 +8,9 @@ Project-URL: Home, https://keras.io/keras_rs
 Project-URL: Repository, https://github.com/keras-team/keras-rs
 Classifier: Development Status :: 3 - Alpha
 Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
 Classifier: Programming Language :: Python :: 3 :: Only
 Classifier: Operating System :: Unix
 Classifier: Operating System :: Microsoft :: Windows
@@ -17,7 +18,7 @@ Classifier: Operating System :: MacOS
 Classifier: Intended Audience :: Science/Research
 Classifier: Topic :: Scientific/Engineering
 Classifier: Topic :: Software Development
-Requires-Python: >=3.10
+Requires-Python: >=3.11
 Description-Content-Type: text/markdown
 Requires-Dist: keras
 Requires-Dist: ml-dtypes

{keras_rs_nightly-0.2.2.dev202509030321 → keras_rs_nightly-0.2.2.dev202509170322}/keras_rs/src/layers/embedding/base_distributed_embedding.py RENAMED Viewed

@@ -822,13 +822,13 @@ class DistributedEmbedding(keras.layers.Layer):
         table_stacking: str | Sequence[Sequence[str]],
     ) -> None:
         del table_stacking
-        table_to_embedding_layer: dict[TableConfig, EmbedReduce] = {}
+        table_config_id_to_embedding_layer: dict[int, EmbedReduce] = {}
         self._default_device_embedding_layers: dict[str, EmbedReduce] = {}
         for path, feature_config in feature_configs.items():
-            if feature_config.table in table_to_embedding_layer:
+            if id(feature_config.table) in table_config_id_to_embedding_layer:
                 self._default_device_embedding_layers[path] = (
-                    table_to_embedding_layer[feature_config.table]
+                    table_config_id_to_embedding_layer[id(feature_config.table)]
                 )
             else:
                 embedding_layer = EmbedReduce(
@@ -838,7 +838,9 @@ class DistributedEmbedding(keras.layers.Layer):
                     embeddings_initializer=feature_config.table.initializer,
                     combiner=feature_config.table.combiner,
                 )
-                table_to_embedding_layer[feature_config.table] = embedding_layer
+                table_config_id_to_embedding_layer[id(feature_config.table)] = (
+                    embedding_layer
+                )
                 self._default_device_embedding_layers[path] = embedding_layer
     def _default_device_build(
@@ -1013,8 +1015,8 @@ class DistributedEmbedding(keras.layers.Layer):
         # The serialized `TableConfig` objects.
         table_config_dicts: list[dict[str, Any]] = []
-        # Mapping from `TableConfig` to index in `table_config_dicts`.
-        table_config_indices: dict[TableConfig, int] = {}
+        # Mapping from `TableConfig` id to index in `table_config_dicts`.
+        table_config_id_to_index: dict[int, int] = {}
         def serialize_feature_config(
             feature_config: FeatureConfig,
@@ -1024,17 +1026,17 @@ class DistributedEmbedding(keras.layers.Layer):
             # key.
             feature_config_dict = feature_config.get_config()
-            if feature_config.table not in table_config_indices:
+            if id(feature_config.table) not in table_config_id_to_index:
                 # Save the serialized `TableConfig` the first time we see it and
                 # remember its index.
-                table_config_indices[feature_config.table] = len(
+                table_config_id_to_index[id(feature_config.table)] = len(
                     table_config_dicts
                 )
                 table_config_dicts.append(feature_config_dict["table"])
             # Replace the serialized `TableConfig` with its index.
-            feature_config_dict["table"] = table_config_indices[
-                feature_config.table
+            feature_config_dict["table"] = table_config_id_to_index[
+                id(feature_config.table)
             ]
             return feature_config_dict

{keras_rs_nightly-0.2.2.dev202509030321 → keras_rs_nightly-0.2.2.dev202509170322}/keras_rs/src/layers/embedding/distributed_embedding_config.py RENAMED Viewed

@@ -10,7 +10,7 @@ from keras_rs.src.api_export import keras_rs_export
 @keras_rs_export("keras_rs.layers.TableConfig")
-@dataclasses.dataclass(eq=True, unsafe_hash=True, order=True)
+@dataclasses.dataclass(order=True)
 class TableConfig:
     """Configuration for one embedding table.
@@ -88,7 +88,7 @@ class TableConfig:
 @keras_rs_export("keras_rs.layers.FeatureConfig")
-@dataclasses.dataclass(eq=True, unsafe_hash=True, order=True)
+@dataclasses.dataclass(order=True)
 class FeatureConfig:
     """Configuration for one embedding feature.

{keras_rs_nightly-0.2.2.dev202509030321 → keras_rs_nightly-0.2.2.dev202509170322}/keras_rs/src/layers/embedding/jax/distributed_embedding.py RENAMED Viewed

@@ -15,7 +15,6 @@ from jax_tpu_embedding.sparsecore.lib.nn import (
     table_stacking as jte_table_stacking,
 )
 from jax_tpu_embedding.sparsecore.utils import utils as jte_utils
-from keras.src import backend
 from keras_rs.src import types
 from keras_rs.src.layers.embedding import base_distributed_embedding
@@ -247,23 +246,6 @@ class DistributedEmbedding(base_distributed_embedding.DistributedEmbedding):
         )
         return sparsecore_distribution, sparsecore_layout
-    def _create_cpu_distribution(
-        self, cpu_axis_name: str = "cpu"
-    ) -> tuple[
-        keras.distribution.ModelParallel, keras.distribution.TensorLayout
-    ]:
-        """Share a variable across all CPU processes."""
-        cpu_devices = jax.devices("cpu")
-        device_mesh = keras.distribution.DeviceMesh(
-            (len(cpu_devices),), [cpu_axis_name], cpu_devices
-        )
-        replicated_layout = keras.distribution.TensorLayout([], device_mesh)
-        layout_map = keras.distribution.LayoutMap(device_mesh=device_mesh)
-        cpu_distribution = keras.distribution.ModelParallel(
-            layout_map=layout_map
-        )
-        return cpu_distribution, replicated_layout
     def _add_sparsecore_weight(
         self,
         name: str,
@@ -405,11 +387,6 @@ class DistributedEmbedding(base_distributed_embedding.DistributedEmbedding):
         self._sparsecore_layout = sparsecore_layout
         self._sparsecore_distribution = sparsecore_distribution
-        # Distribution for CPU operations.
-        cpu_distribution, cpu_layout = self._create_cpu_distribution()
-        self._cpu_distribution = cpu_distribution
-        self._cpu_layout = cpu_layout
         mesh = sparsecore_distribution.device_mesh.backend_mesh
         global_device_count = mesh.devices.size
         num_sc_per_device = jte_utils.num_sparsecores_per_device(
@@ -466,10 +443,6 @@ class DistributedEmbedding(base_distributed_embedding.DistributedEmbedding):
         # Collect all stacked tables.
         table_specs = embedding_utils.get_table_specs(feature_specs)
         table_stacks = embedding_utils.get_table_stacks(table_specs)
-        stacked_table_specs = {
-            stack_name: stack[0].stacked_table_spec
-            for stack_name, stack in table_stacks.items()
-        }
         # Create variables for all stacked tables and slot variables.
         with sparsecore_distribution.scope():
@@ -502,50 +475,6 @@ class DistributedEmbedding(base_distributed_embedding.DistributedEmbedding):
             )
             self._iterations.overwrite_with_gradient = True
-        with cpu_distribution.scope():
-            # Create variables to track static buffer size and max IDs for each
-            # table during preprocessing.  These variables are shared across all
-            # processes on CPU.  We don't add these via `add_weight` because we
-            # can't have them passed to the training function.
-            replicated_zeros_initializer = ShardedInitializer(
-                "zeros", cpu_layout
-            )
-            with backend.name_scope(self.name, caller=self):
-                self._preprocessing_buffer_size = {
-                    table_name: backend.Variable(
-                        initializer=replicated_zeros_initializer,
-                        shape=(),
-                        dtype=backend.standardize_dtype("int32"),
-                        trainable=False,
-                        name=table_name + ":preprocessing:buffer_size",
-                    )
-                    for table_name in stacked_table_specs.keys()
-                }
-                self._preprocessing_max_unique_ids_per_partition = {
-                    table_name: backend.Variable(
-                        shape=(),
-                        name=table_name
-                        + ":preprocessing:max_unique_ids_per_partition",
-                        initializer=replicated_zeros_initializer,
-                        dtype=backend.standardize_dtype("int32"),
-                        trainable=False,
-                    )
-                    for table_name in stacked_table_specs.keys()
-                }
-                self._preprocessing_max_ids_per_partition = {
-                    table_name: backend.Variable(
-                        shape=(),
-                        name=table_name
-                        + ":preprocessing:max_ids_per_partition",
-                        initializer=replicated_zeros_initializer,
-                        dtype=backend.standardize_dtype("int32"),
-                        trainable=False,
-                    )
-                    for table_name in stacked_table_specs.keys()
-                }
         self._config = jte_embedding_lookup.EmbeddingLookupConfiguration(
             feature_specs,
             mesh=mesh,
@@ -660,76 +589,35 @@ class DistributedEmbedding(base_distributed_embedding.DistributedEmbedding):
             mesh.devices.item(0)
         )
-        # Get current buffer size/max_ids.
-        previous_max_ids_per_partition = keras.tree.map_structure(
-            lambda max_ids_per_partition: max_ids_per_partition.value.item(),
-            self._preprocessing_max_ids_per_partition,
-        )
-        previous_max_unique_ids_per_partition = keras.tree.map_structure(
-            lambda max_unique_ids_per_partition: (
-                max_unique_ids_per_partition.value.item()
-            ),
-            self._preprocessing_max_unique_ids_per_partition,
-        )
-        previous_buffer_size = keras.tree.map_structure(
-            lambda buffer_size: buffer_size.value.item(),
-            self._preprocessing_buffer_size,
-        )
         preprocessed, stats = embedding_utils.stack_and_shard_samples(
             self._config.feature_specs,
             samples,
             local_device_count,
             global_device_count,
             num_sc_per_device,
-            static_buffer_size=previous_buffer_size,
         )
-        # Extract max unique IDs and buffer sizes.
-        # We need to replicate this value across all local CPU devices.
         if training:
+            # Synchronize input statistics across all devices and update the
+            # underlying stacked tables specs in the feature specs.
+            prev_stats = embedding_utils.get_stacked_table_stats(
+                self._config.feature_specs
+            )
+            # Take the maximum with existing stats.
+            stats = keras.tree.map_structure(max, prev_stats, stats)
+            # Flatten the stats so we can more efficiently transfer them
+            # between hosts.  We use jax.tree because we will later need to
+            # unflatten.
+            flat_stats, stats_treedef = jax.tree.flatten(stats)
+            # In the case of multiple local CPU devices per host, we need to
+            # replicate the stats to placate JAX collectives.
             num_local_cpu_devices = jax.local_device_count("cpu")
-            local_max_ids_per_partition = {
-                table_name: np.repeat(
-                    # Maximum across all partitions and previous max.
-                    np.maximum(
-                        np.max(elems),
-                        previous_max_ids_per_partition[table_name],
-                    ),
-                    num_local_cpu_devices,
-                )
-                for table_name, elems in stats.max_ids_per_partition.items()
-            }
-            local_max_unique_ids_per_partition = {
-                name: np.repeat(
-                    # Maximum across all partitions and previous max.
-                    np.maximum(
-                        np.max(elems),
-                        previous_max_unique_ids_per_partition[name],
-                    ),
-                    num_local_cpu_devices,
-                )
-                for name, elems in stats.max_unique_ids_per_partition.items()
-            }
-            local_buffer_size = {
-                table_name: np.repeat(
-                    np.maximum(
-                        np.max(
-                            # Round values up to the next multiple of 8.
-                            # Currently using this as a proxy for the actual
-                            # required buffer size.
-                            ((elems + 7) // 8) * 8
-                        )
-                        * global_device_count
-                        * num_sc_per_device
-                        * local_device_count
-                        * num_sc_per_device,
-                        previous_buffer_size[table_name],
-                    ),
-                    num_local_cpu_devices,
-                )
-                for table_name, elems in stats.max_ids_per_partition.items()
-            }
+            tiled_stats = np.tile(
+                np.array(flat_stats, dtype=np.int32), (num_local_cpu_devices, 1)
+            )
             # Aggregate variables across all processes/devices.
             max_across_cpus = jax.pmap(
@@ -737,48 +625,24 @@ class DistributedEmbedding(base_distributed_embedding.DistributedEmbedding):
                     x, "all_cpus"
                 ),
                 axis_name="all_cpus",
-                devices=self._cpu_layout.device_mesh.backend_mesh.devices,
+                backend="cpu",
             )
-            new_max_ids_per_partition = max_across_cpus(
-                local_max_ids_per_partition
-            )
-            new_max_unique_ids_per_partition = max_across_cpus(
-                local_max_unique_ids_per_partition
-            )
-            new_buffer_size = max_across_cpus(local_buffer_size)
-            # Assign new preprocessing parameters.
-            with self._cpu_distribution.scope():
-                # For each process, all max ids/buffer sizes are replicated
-                # across all local devices.  Take the value from the first
-                # device.
-                keras.tree.map_structure(
-                    lambda var, values: var.assign(values[0]),
-                    self._preprocessing_max_ids_per_partition,
-                    new_max_ids_per_partition,
-                )
-                keras.tree.map_structure(
-                    lambda var, values: var.assign(values[0]),
-                    self._preprocessing_max_unique_ids_per_partition,
-                    new_max_unique_ids_per_partition,
-                )
-                keras.tree.map_structure(
-                    lambda var, values: var.assign(values[0]),
-                    self._preprocessing_buffer_size,
-                    new_buffer_size,
-                )
-                # Update parameters in the underlying feature specs.
-                int_max_ids_per_partition = keras.tree.map_structure(
-                    lambda varray: varray.item(), new_max_ids_per_partition
-                )
-                int_max_unique_ids_per_partition = keras.tree.map_structure(
-                    lambda varray: varray.item(),
-                    new_max_unique_ids_per_partition,
+            flat_stats = max_across_cpus(tiled_stats)[0].tolist()
+            stats = jax.tree.unflatten(stats_treedef, flat_stats)
+            # Update configuration and repeat preprocessing if stats changed.
+            if stats != prev_stats:
+                embedding_utils.update_stacked_table_stats(
+                    self._config.feature_specs, stats
                 )
-                embedding_utils.update_stacked_table_specs(
+                # Re-execute preprocessing with consistent input statistics.
+                preprocessed, _ = embedding_utils.stack_and_shard_samples(
                     self._config.feature_specs,
-                    int_max_ids_per_partition,
-                    int_max_unique_ids_per_partition,
+                    samples,
+                    local_device_count,
+                    global_device_count,
+                    num_sc_per_device,
                 )
         return {"inputs": preprocessed}
@@ -826,19 +690,22 @@ class DistributedEmbedding(base_distributed_embedding.DistributedEmbedding):
             raise ValueError("Layer must first be built before setting tables.")
         if "default_device" in self._placement_to_path_to_feature_config:
-            table_to_embedding_layer = {}
+            table_name_to_embedding_layer = {}
             for (
                 path,
                 feature_config,
             ) in self._placement_to_path_to_feature_config[
                 "default_device"
             ].items():
-                table_to_embedding_layer[feature_config.table] = (
+                table_name_to_embedding_layer[feature_config.table.name] = (
                     self._default_device_embedding_layers[path]
                 )
-            for table, embedding_layer in table_to_embedding_layer.items():
-                table_values = tables.get(table.name, None)
+            for (
+                table_name,
+                embedding_layer,
+            ) in table_name_to_embedding_layer.items():
+                table_values = tables.get(table_name, None)
                 if table_values is not None:
                     if embedding_layer.lora_enabled:
                         raise ValueError("Cannot set table if LoRA is enabled.")

{keras_rs_nightly-0.2.2.dev202509030321 → keras_rs_nightly-0.2.2.dev202509170322}/keras_rs/src/layers/embedding/jax/embedding_utils.py RENAMED Viewed

@@ -35,6 +35,12 @@ class ShardedCooMatrix(NamedTuple):
     values: ArrayLike
+class InputStatsPerTable(NamedTuple):
+    max_ids_per_partition: int
+    max_unique_ids_per_partition: int
+    required_buffer_size_per_device: int
 def _round_up_to_multiple(value: int, multiple: int) -> int:
     return ((value + multiple - 1) // multiple) * multiple
@@ -335,19 +341,47 @@ def get_table_stacks(
     return stacked_table_specs
-def update_stacked_table_specs(
+def get_stacked_table_stats(
     feature_specs: Nested[FeatureSpec],
-    max_ids_per_partition: Mapping[str, int],
-    max_unique_ids_per_partition: Mapping[str, int],
+) -> dict[str, InputStatsPerTable]:
+    """Extracts the stacked-table input statistics from the feature specs.
+    Args:
+        feature_specs: Feature specs from which to extracts the statistics.
+    Returns:
+        A mapping of stacked table names to input statistics per table.
+    """
+    stacked_table_specs: dict[str, StackedTableSpec] = {}
+    for feature_spec in jax.tree.flatten(feature_specs)[0]:
+        feature_spec = typing.cast(FeatureSpec, feature_spec)
+        stacked_table_spec = typing.cast(
+            StackedTableSpec, feature_spec.table_spec.stacked_table_spec
+        )
+        stacked_table_specs[stacked_table_spec.stack_name] = stacked_table_spec
+    stats: dict[str, InputStatsPerTable] = {}
+    for stacked_table_spec in stacked_table_specs.values():
+        buffer_size = stacked_table_spec.suggested_coo_buffer_size_per_device
+        buffer_size = buffer_size or 0
+        stats[stacked_table_spec.stack_name] = InputStatsPerTable(
+            max_ids_per_partition=stacked_table_spec.max_ids_per_partition,
+            max_unique_ids_per_partition=stacked_table_spec.max_unique_ids_per_partition,
+            required_buffer_size_per_device=buffer_size,
+        )
+    return stats
+def update_stacked_table_stats(
+    feature_specs: Nested[FeatureSpec],
+    stats: Mapping[str, InputStatsPerTable],
 ) -> None:
-    """Updates properties in the supplied feature specs.
+    """Updates stacked-table input properties in the supplied feature specs.
     Args:
         feature_specs: Feature specs to update in-place.
-        max_ids_per_partition: Mapping of table stack name to
-            new `max_ids_per_partition` for the stack.
-        max_unique_ids_per_partition: Mapping of table stack name to
-            new `max_unique_ids_per_partition` for the stack.
+        stats: Per-stacked-table input statistics.
     """
     # Collect table specs and stacked table specs.
     table_specs: dict[str, TableSpec] = {}
@@ -363,18 +397,17 @@ def update_stacked_table_specs(
         stacked_table_specs[stacked_table_spec.stack_name] = stacked_table_spec
     # Replace fields in the stacked_table_specs.
-    stacked_table_specs = {
-        stack_name: dataclasses.replace(
+    stack_names = stacked_table_specs.keys()
+    for stack_name in stack_names:
+        stack_stats = stats[stack_name]
+        stacked_table_spec = stacked_table_specs[stack_name]
+        buffer_size = stack_stats.required_buffer_size_per_device or None
+        stacked_table_specs[stack_name] = dataclasses.replace(
             stacked_table_spec,
-            max_ids_per_partition=max_ids_per_partition[
-                stacked_table_spec.stack_name
-            ],
-            max_unique_ids_per_partition=max_unique_ids_per_partition[
-                stacked_table_spec.stack_name
-            ],
+            max_ids_per_partition=stack_stats.max_ids_per_partition,
+            max_unique_ids_per_partition=stack_stats.max_unique_ids_per_partition,
+            suggested_coo_buffer_size_per_device=buffer_size,
         )
-        for stack_name, stacked_table_spec in stacked_table_specs.items()
-    }
     # Insert new stacked tables into tables.
     for table_spec in table_specs.values():
@@ -534,7 +567,7 @@ def stack_and_shard_samples(
     global_device_count: int,
     num_sc_per_device: int,
     static_buffer_size: int | Mapping[str, int] | None = None,
-) -> tuple[dict[str, ShardedCooMatrix], embedding.SparseDenseMatmulInputStats]:
+) -> tuple[dict[str, ShardedCooMatrix], dict[str, InputStatsPerTable]]:
     """Prepares input samples for use in embedding lookups.
     Args:
@@ -544,8 +577,8 @@ def stack_and_shard_samples(
         global_device_count: Number of global JAX devices.
         num_sc_per_device: Number of sparsecores per device.
         static_buffer_size: The static buffer size to use for the samples.
-            Defaults to None, in which case an upper-bound for the buffer size
-            will be automatically determined.
+          Defaults to None, in which case an upper-bound for the buffer size
+          will be automatically determined.
     Returns:
         The preprocessed inputs, and statistics useful for updating FeatureSpecs
@@ -579,6 +612,7 @@ def stack_and_shard_samples(
     )
     out: dict[str, ShardedCooMatrix] = {}
+    out_stats: dict[str, InputStatsPerTable] = {}
     tables_names = preprocessed_inputs.lhs_row_pointers.keys()
     for table_name in tables_names:
         shard_ends = preprocessed_inputs.lhs_row_pointers[table_name]
@@ -592,5 +626,17 @@ def stack_and_shard_samples(
             row_ids=preprocessed_inputs.lhs_sample_ids[table_name],
             values=preprocessed_inputs.lhs_gains[table_name],
         )
+        out_stats[table_name] = InputStatsPerTable(
+            max_ids_per_partition=np.max(
+                stats.max_ids_per_partition[table_name]
+            ),
+            max_unique_ids_per_partition=np.max(
+                stats.max_unique_ids_per_partition[table_name]
+            ),
+            required_buffer_size_per_device=np.max(
+                stats.required_buffer_size_per_sc[table_name]
+            )
+            * num_sc_per_device,
+        )
-    return out, stats
+    return out, out_stats

{keras_rs_nightly-0.2.2.dev202509030321 → keras_rs_nightly-0.2.2.dev202509170322}/keras_rs/src/layers/embedding/tensorflow/config_conversion.py RENAMED Viewed

@@ -53,7 +53,7 @@ OPTIMIZER_MAPPINGS = {
 # KerasRS to TensorFlow
-def translate_keras_rs_configuration(
+def keras_to_tf_tpu_configuration(
     feature_configs: types.Nested[FeatureConfig],
     table_stacking: str | Sequence[str] | Sequence[Sequence[str]],
     num_replicas_in_sync: int,
@@ -66,14 +66,15 @@ def translate_keras_rs_configuration(
     Args:
       feature_configs: The nested Keras RS feature configs.
       table_stacking: The Keras RS table stacking.
+      num_replicas_in_sync: The number of replicas in sync from the strategy.
     Returns:
       A tuple containing the TensorFlow TPU feature configs and the TensorFlow
       TPU sparse core embedding config.
     """
-    tables: dict[TableConfig, tf.tpu.experimental.embedding.TableConfig] = {}
+    tables: dict[int, tf.tpu.experimental.embedding.TableConfig] = {}
     feature_configs = keras.tree.map_structure(
-        lambda f: translate_keras_rs_feature_config(
+        lambda f: keras_to_tf_tpu_feature_config(
             f, tables, num_replicas_in_sync
         ),
         feature_configs,
@@ -108,9 +109,9 @@ def translate_keras_rs_configuration(
     return feature_configs, sparse_core_embedding_config
-def translate_keras_rs_feature_config(
+def keras_to_tf_tpu_feature_config(
     feature_config: FeatureConfig,
-    tables: dict[TableConfig, tf.tpu.experimental.embedding.TableConfig],
+    tables: dict[int, tf.tpu.experimental.embedding.TableConfig],
     num_replicas_in_sync: int,
 ) -> tf.tpu.experimental.embedding.FeatureConfig:
     """Translates a Keras RS feature config to a TensorFlow TPU feature config.
@@ -120,7 +121,8 @@ def translate_keras_rs_feature_config(
     Args:
       feature_config: The Keras RS feature config to translate.
-      tables: A mapping of KerasRS table configs to TF TPU table configs.
+      tables: A mapping of KerasRS table config ids to TF TPU table configs.
+      num_replicas_in_sync: The number of replicas in sync from the strategy.
     Returns:
       The TensorFlow TPU feature config.
@@ -131,10 +133,10 @@ def translate_keras_rs_feature_config(
             f"but got {num_replicas_in_sync}."
         )
-    table = tables.get(feature_config.table, None)
+    table = tables.get(id(feature_config.table), None)
     if table is None:
-        table = translate_keras_rs_table_config(feature_config.table)
-        tables[feature_config.table] = table
+        table = keras_to_tf_tpu_table_config(feature_config.table)
+        tables[id(feature_config.table)] = table
     if len(feature_config.output_shape) < 2:
         raise ValueError(
@@ -168,7 +170,7 @@ def translate_keras_rs_feature_config(
     )
-def translate_keras_rs_table_config(
+def keras_to_tf_tpu_table_config(
     table_config: TableConfig,
 ) -> tf.tpu.experimental.embedding.TableConfig:
     initializer = table_config.initializer
@@ -179,13 +181,13 @@ def translate_keras_rs_table_config(
         vocabulary_size=table_config.vocabulary_size,
         dim=table_config.embedding_dim,
         initializer=initializer,
-        optimizer=translate_optimizer(table_config.optimizer),
+        optimizer=to_tf_tpu_optimizer(table_config.optimizer),
         combiner=table_config.combiner,
         name=table_config.name,
     )
-def translate_keras_optimizer(
+def keras_to_tf_tpu_optimizer(
     optimizer: keras.optimizers.Optimizer,
 ) -> TfTpuOptimizer:
     """Translates a Keras optimizer to a TensorFlow TPU `_Optimizer`.
@@ -238,7 +240,12 @@ def translate_keras_optimizer(
             "Unsupported optimizer option `Optimizer.loss_scale_factor`."
         )
-    optimizer_mapping = OPTIMIZER_MAPPINGS.get(type(optimizer), None)
+    optimizer_mapping = None
+    for optimizer_class, mapping in OPTIMIZER_MAPPINGS.items():
+        # Handle subclasses of the main optimizer class.
+        if isinstance(optimizer, optimizer_class):
+            optimizer_mapping = mapping
+            break
     if optimizer_mapping is None:
         raise ValueError(
             f"Unsupported optimizer type {type(optimizer)}. Optimizer must be "
@@ -258,7 +265,7 @@ def translate_keras_optimizer(
     return optimizer_mapping.tpu_optimizer_class(**tpu_optimizer_kwargs)
-def translate_optimizer(
+def to_tf_tpu_optimizer(
     optimizer: str | keras.optimizers.Optimizer | TfTpuOptimizer | None,
 ) -> TfTpuOptimizer:
     """Translates a Keras optimizer into a TensorFlow TPU `_Optimizer`.
@@ -299,7 +306,7 @@ def translate_optimizer(
                 "'sgd', 'adagrad', 'adam', or 'ftrl'"
             )
     elif isinstance(optimizer, keras.optimizers.Optimizer):
-        return translate_keras_optimizer(optimizer)
+        return keras_to_tf_tpu_optimizer(optimizer)
     else:
         raise ValueError(
             f"Unknown optimizer type {type(optimizer)}. Please pass an "
@@ -312,7 +319,7 @@ def translate_optimizer(
 # TensorFlow to TensorFlow
-def clone_tf_feature_configs(
+def clone_tf_tpu_feature_configs(
     feature_configs: types.Nested[tf.tpu.experimental.embedding.FeatureConfig],
 ) -> types.Nested[tf.tpu.experimental.embedding.FeatureConfig]:
     """Clones and resolves TensorFlow TPU feature configs.
@@ -327,7 +334,7 @@ def clone_tf_feature_configs(
     """
     table_configs_dict = {}
-    def clone_and_resolve_tf_feature_config(
+    def clone_and_resolve_tf_tpu_feature_config(
         fc: tf.tpu.experimental.embedding.FeatureConfig,
     ) -> tf.tpu.experimental.embedding.FeatureConfig:
         if fc.table not in table_configs_dict:
@@ -336,7 +343,7 @@ def clone_tf_feature_configs(
                     vocabulary_size=fc.table.vocabulary_size,
                     dim=fc.table.dim,
                     initializer=fc.table.initializer,
-                    optimizer=translate_optimizer(fc.table.optimizer),
+                    optimizer=to_tf_tpu_optimizer(fc.table.optimizer),
                     combiner=fc.table.combiner,
                     name=fc.table.name,
                     quantization_config=fc.table.quantization_config,
@@ -352,5 +359,5 @@ def clone_tf_feature_configs(
         )
     return keras.tree.map_structure(
-        clone_and_resolve_tf_feature_config, feature_configs
+        clone_and_resolve_tf_tpu_feature_config, feature_configs
     )

{keras_rs_nightly-0.2.2.dev202509030321 → keras_rs_nightly-0.2.2.dev202509170322}/keras_rs/src/layers/embedding/tensorflow/distributed_embedding.py RENAMED Viewed

@@ -106,7 +106,7 @@ class DistributedEmbedding(base_distributed_embedding.DistributedEmbedding):
                     "for the configuration."
                 )
             self._tpu_feature_configs, self._sparse_core_embedding_config = (
-                config_conversion.translate_keras_rs_configuration(
+                config_conversion.keras_to_tf_tpu_configuration(
                     feature_configs,
                     table_stacking,
                     strategy.num_replicas_in_sync,
@@ -135,10 +135,10 @@ class DistributedEmbedding(base_distributed_embedding.DistributedEmbedding):
                     "supported with this TPU generation."
                 )
             self._tpu_feature_configs = (
-                config_conversion.clone_tf_feature_configs(feature_configs)
+                config_conversion.clone_tf_tpu_feature_configs(feature_configs)
             )
-        self._tpu_optimizer = config_conversion.translate_optimizer(
+        self._tpu_optimizer = config_conversion.to_tf_tpu_optimizer(
             self._optimizer
         )
@@ -281,8 +281,18 @@ class DistributedEmbedding(base_distributed_embedding.DistributedEmbedding):
     def _sparsecore_get_embedding_tables(self) -> dict[str, types.Tensor]:
         tables: dict[str, types.Tensor] = {}
         strategy = tf.distribute.get_strategy()
-        # 4 is the number of sparsecores per chip
-        num_shards = strategy.num_replicas_in_sync * 4
+        if not self._is_tpu_strategy(strategy):
+            raise RuntimeError(
+                "`DistributedEmbedding.get_embedding_tables` needs to be "
+                "called under the TPUStrategy that DistributedEmbedding was "
+                f"created with, but is being called under strategy {strategy}. "
+                "Please use `with strategy.scope()` when calling "
+                "`get_embedding_tables`."
+            )
+        tpu_hardware = strategy.extended.tpu_hardware_feature
+        num_sc_per_device = tpu_hardware.num_embedding_devices_per_chip
+        num_shards = strategy.num_replicas_in_sync * num_sc_per_device
         def populate_table(
             feature_config: tf.tpu.experimental.embedding.FeatureConfig,

{keras_rs_nightly-0.2.2.dev202509030321 → keras_rs_nightly-0.2.2.dev202509170322}/keras_rs/src/version.py RENAMED Viewed

@@ -1,7 +1,7 @@
 from keras_rs.src.api_export import keras_rs_export
 # Unique source of truth for the version number.
-__version__ = "0.2.2.dev202509030321"
+__version__ = "0.2.2.dev202509170322"
 @keras_rs_export("keras_rs.version")

{keras_rs_nightly-0.2.2.dev202509030321 → keras_rs_nightly-0.2.2.dev202509170322}/keras_rs_nightly.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: keras-rs-nightly
-Version: 0.2.2.dev202509030321
+Version: 0.2.2.dev202509170322
 Summary: Multi-backend recommender systems with Keras 3.
 Author-email: Keras team <keras-users@googlegroups.com>
 License: Apache License 2.0
@@ -8,8 +8,9 @@ Project-URL: Home, https://keras.io/keras_rs
 Project-URL: Repository, https://github.com/keras-team/keras-rs
 Classifier: Development Status :: 3 - Alpha
 Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
 Classifier: Programming Language :: Python :: 3 :: Only
 Classifier: Operating System :: Unix
 Classifier: Operating System :: Microsoft :: Windows
@@ -17,7 +18,7 @@ Classifier: Operating System :: MacOS
 Classifier: Intended Audience :: Science/Research
 Classifier: Topic :: Scientific/Engineering
 Classifier: Topic :: Software Development
-Requires-Python: >=3.10
+Requires-Python: >=3.11
 Description-Content-Type: text/markdown
 Requires-Dist: keras
 Requires-Dist: ml-dtypes

{keras_rs_nightly-0.2.2.dev202509030321 → keras_rs_nightly-0.2.2.dev202509170322}/pyproject.toml RENAMED Viewed

@@ -9,14 +9,15 @@ authors = [
 ]
 description = "Multi-backend recommender systems with Keras 3."
 readme = "README.md"
-requires-python = ">=3.10"
+requires-python = ">=3.11"
 license = {text = "Apache License 2.0"}
 dynamic = ["version"]
 classifiers = [
     "Development Status :: 3 - Alpha",
     "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
     "Programming Language :: Python :: 3 :: Only",
     "Operating System :: Unix",
     "Operating System :: Microsoft :: Windows",