PyPI - keras-rs-nightly - Versions diffs - 0.3.1.dev202510060327__tar.gz → 0.3.1.dev202510080323__tar.gz - Mend

keras-rs-nightly 0.3.1.dev202510060327tar.gz → 0.3.1.dev202510080323tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of keras-rs-nightly might be problematic. Click here for more details.

Files changed (61) hide show

{keras_rs_nightly-0.3.1.dev202510060327 → keras_rs_nightly-0.3.1.dev202510080323}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: keras-rs-nightly
-Version: 0.3.1.dev202510060327
+Version: 0.3.1.dev202510080323
 Summary: Multi-backend recommender systems with Keras 3.
 Author-email: Keras team <keras-users@googlegroups.com>
 License: Apache License 2.0

{keras_rs_nightly-0.3.1.dev202510060327 → keras_rs_nightly-0.3.1.dev202510080323}/keras_rs/src/layers/embedding/jax/distributed_embedding.py RENAMED Viewed

@@ -441,7 +441,7 @@ class DistributedEmbedding(base_distributed_embedding.DistributedEmbedding):
         )
         # Collect all stacked tables.
-        table_specs = embedding_utils.get_table_specs(feature_specs)
+        table_specs = embedding.get_table_specs(feature_specs)
         table_stacks = embedding_utils.get_table_stacks(table_specs)
         # Create variables for all stacked tables and slot variables.
@@ -515,9 +515,7 @@ class DistributedEmbedding(base_distributed_embedding.DistributedEmbedding):
         del inputs, weights, training
         # Each stacked-table gets a ShardedCooMatrix.
-        table_specs = embedding_utils.get_table_specs(
-            self._config.feature_specs
-        )
+        table_specs = embedding.get_table_specs(self._config.feature_specs)
         table_stacks = embedding_utils.get_table_stacks(table_specs)
         stacked_table_specs = {
             stack_name: stack[0].stacked_table_spec
@@ -600,40 +598,43 @@ class DistributedEmbedding(base_distributed_embedding.DistributedEmbedding):
         if training:
             # Synchronize input statistics across all devices and update the
             # underlying stacked tables specs in the feature specs.
-            prev_stats = embedding_utils.get_stacked_table_stats(
-                self._config.feature_specs
-            )
-            # Take the maximum with existing stats.
-            stats = keras.tree.map_structure(max, prev_stats, stats)
+            # Aggregate stats across all processes/devices via pmax.
+            num_local_cpu_devices = jax.local_device_count("cpu")
-            # Flatten the stats so we can more efficiently transfer them
-            # between hosts.  We use jax.tree because we will later need to
-            # unflatten.
-            flat_stats, stats_treedef = jax.tree.flatten(stats)
+            def pmax_aggregate(x: Any) -> Any:
+                if not hasattr(x, "ndim"):
+                    x = np.array(x)
+                tiled_x = np.tile(x, (num_local_cpu_devices, *([1] * x.ndim)))
+                return jax.pmap(
+                    lambda y: jax.lax.pmax(y, "all_cpus"),  # type: ignore[no-untyped-call]
+                    axis_name="all_cpus",
+                    backend="cpu",
+                )(tiled_x)[0]
-            # In the case of multiple local CPU devices per host, we need to
-            # replicate the stats to placate JAX collectives.
-            num_local_cpu_devices = jax.local_device_count("cpu")
-            tiled_stats = np.tile(
-                np.array(flat_stats, dtype=np.int32), (num_local_cpu_devices, 1)
-            )
+            full_stats = jax.tree.map(pmax_aggregate, stats)
-            # Aggregate variables across all processes/devices.
-            max_across_cpus = jax.pmap(
-                lambda x: jax.lax.pmax(  # type: ignore[no-untyped-call]
-                    x, "all_cpus"
-                ),
-                axis_name="all_cpus",
-                backend="cpu",
+            # Check if stats changed enough to warrant action.
+            stacked_table_specs = embedding.get_stacked_table_specs(
+                self._config.feature_specs
+            )
+            changed = any(
+                np.max(full_stats.max_ids_per_partition[stack_name])
+                > spec.max_ids_per_partition
+                or np.max(full_stats.max_unique_ids_per_partition[stack_name])
+                > spec.max_unique_ids_per_partition
+                or (
+                    np.max(full_stats.required_buffer_size_per_sc[stack_name])
+                    * num_sc_per_device
+                )
+                > (spec.suggested_coo_buffer_size_per_device or 0)
+                for stack_name, spec in stacked_table_specs.items()
             )
-            flat_stats = max_across_cpus(tiled_stats)[0].tolist()
-            stats = jax.tree.unflatten(stats_treedef, flat_stats)
             # Update configuration and repeat preprocessing if stats changed.
-            if stats != prev_stats:
-                embedding_utils.update_stacked_table_stats(
-                    self._config.feature_specs, stats
+            if changed:
+                embedding.update_preprocessing_parameters(
+                    self._config.feature_specs, full_stats, num_sc_per_device
                 )
                 # Re-execute preprocessing with consistent input statistics.
@@ -718,7 +719,7 @@ class DistributedEmbedding(base_distributed_embedding.DistributedEmbedding):
         config = self._config
         num_table_shards = config.mesh.devices.size * config.num_sc_per_device
-        table_specs = embedding_utils.get_table_specs(config.feature_specs)
+        table_specs = embedding.get_table_specs(config.feature_specs)
         sharded_tables = embedding_utils.stack_and_shard_tables(
             table_specs,
             tables,
@@ -750,7 +751,7 @@ class DistributedEmbedding(base_distributed_embedding.DistributedEmbedding):
         config = self._config
         num_table_shards = config.mesh.devices.size * config.num_sc_per_device
-        table_specs = embedding_utils.get_table_specs(config.feature_specs)
+        table_specs = embedding.get_table_specs(config.feature_specs)
         # Extract only the table variables, not the gradient slot variables.
         table_variables = {

{keras_rs_nightly-0.3.1.dev202510060327 → keras_rs_nightly-0.3.1.dev202510080323}/keras_rs/src/layers/embedding/jax/embedding_utils.py RENAMED Viewed

@@ -1,7 +1,6 @@
 """Utility functions for manipulating JAX embedding tables and inputs."""
 import collections
-import dataclasses
 import typing
 from typing import Any, Mapping, NamedTuple, Sequence, TypeAlias, TypeVar
@@ -35,12 +34,6 @@ class ShardedCooMatrix(NamedTuple):
     values: ArrayLike
-class InputStatsPerTable(NamedTuple):
-    max_ids_per_partition: int
-    max_unique_ids_per_partition: int
-    required_buffer_size_per_device: int
 def _round_up_to_multiple(value: int, multiple: int) -> int:
     return ((value + multiple - 1) // multiple) * multiple
@@ -303,15 +296,6 @@ def unshard_and_unstack_tables(
     return output
-def get_table_specs(feature_specs: Nested[FeatureSpec]) -> dict[str, TableSpec]:
-    table_spec_map: dict[str, TableSpec] = {}
-    flat_feature_specs, _ = jax.tree.flatten(feature_specs)
-    for feature_spec in flat_feature_specs:
-        table_spec = feature_spec.table_spec
-        table_spec_map[table_spec.name] = table_spec
-    return table_spec_map
 def get_table_stacks(
     table_specs: Nested[TableSpec],
 ) -> dict[str, list[TableSpec]]:
@@ -341,84 +325,6 @@ def get_table_stacks(
     return stacked_table_specs
-def get_stacked_table_stats(
-    feature_specs: Nested[FeatureSpec],
-) -> dict[str, InputStatsPerTable]:
-    """Extracts the stacked-table input statistics from the feature specs.
-    Args:
-        feature_specs: Feature specs from which to extracts the statistics.
-    Returns:
-        A mapping of stacked table names to input statistics per table.
-    """
-    stacked_table_specs: dict[str, StackedTableSpec] = {}
-    for feature_spec in jax.tree.flatten(feature_specs)[0]:
-        feature_spec = typing.cast(FeatureSpec, feature_spec)
-        stacked_table_spec = typing.cast(
-            StackedTableSpec, feature_spec.table_spec.stacked_table_spec
-        )
-        stacked_table_specs[stacked_table_spec.stack_name] = stacked_table_spec
-    stats: dict[str, InputStatsPerTable] = {}
-    for stacked_table_spec in stacked_table_specs.values():
-        buffer_size = stacked_table_spec.suggested_coo_buffer_size_per_device
-        buffer_size = buffer_size or 0
-        stats[stacked_table_spec.stack_name] = InputStatsPerTable(
-            max_ids_per_partition=stacked_table_spec.max_ids_per_partition,
-            max_unique_ids_per_partition=stacked_table_spec.max_unique_ids_per_partition,
-            required_buffer_size_per_device=buffer_size,
-        )
-    return stats
-def update_stacked_table_stats(
-    feature_specs: Nested[FeatureSpec],
-    stats: Mapping[str, InputStatsPerTable],
-) -> None:
-    """Updates stacked-table input properties in the supplied feature specs.
-    Args:
-        feature_specs: Feature specs to update in-place.
-        stats: Per-stacked-table input statistics.
-    """
-    # Collect table specs and stacked table specs.
-    table_specs: dict[str, TableSpec] = {}
-    for feature_spec in jax.tree.flatten(feature_specs)[0]:
-        feature_spec = typing.cast(FeatureSpec, feature_spec)
-        table_specs[feature_spec.table_spec.name] = feature_spec.table_spec
-    stacked_table_specs: dict[str, StackedTableSpec] = {}
-    for table_spec in table_specs.values():
-        stacked_table_spec = typing.cast(
-            StackedTableSpec, table_spec.stacked_table_spec
-        )
-        stacked_table_specs[stacked_table_spec.stack_name] = stacked_table_spec
-    # Replace fields in the stacked_table_specs.
-    stack_names = stacked_table_specs.keys()
-    for stack_name in stack_names:
-        stack_stats = stats[stack_name]
-        stacked_table_spec = stacked_table_specs[stack_name]
-        buffer_size = stack_stats.required_buffer_size_per_device or None
-        stacked_table_specs[stack_name] = dataclasses.replace(
-            stacked_table_spec,
-            max_ids_per_partition=stack_stats.max_ids_per_partition,
-            max_unique_ids_per_partition=stack_stats.max_unique_ids_per_partition,
-            suggested_coo_buffer_size_per_device=buffer_size,
-        )
-    # Insert new stacked tables into tables.
-    for table_spec in table_specs.values():
-        stacked_table_spec = typing.cast(
-            StackedTableSpec, table_spec.stacked_table_spec
-        )
-        table_spec.stacked_table_spec = stacked_table_specs[
-            stacked_table_spec.stack_name
-        ]
 def convert_to_numpy(
     ragged_or_dense: np.ndarray[Any, Any] | Sequence[Sequence[Any]] | Any,
     dtype: Any,
@@ -483,7 +389,7 @@ def ones_like(
     Args:
         ragged_or_dense: The ragged or dense input whose shape and data-type
-              define these same attributes of the returned array.
+            define these same attributes of the returned array.
         dtype: The data-type of the returned array.
     Returns:
@@ -567,7 +473,7 @@ def stack_and_shard_samples(
     global_device_count: int,
     num_sc_per_device: int,
     static_buffer_size: int | Mapping[str, int] | None = None,
-) -> tuple[dict[str, ShardedCooMatrix], dict[str, InputStatsPerTable]]:
+) -> tuple[dict[str, ShardedCooMatrix], embedding.SparseDenseMatmulInputStats]:
     """Prepares input samples for use in embedding lookups.
     Args:
@@ -612,7 +518,6 @@ def stack_and_shard_samples(
     )
     out: dict[str, ShardedCooMatrix] = {}
-    out_stats: dict[str, InputStatsPerTable] = {}
     tables_names = preprocessed_inputs.lhs_row_pointers.keys()
     for table_name in tables_names:
         shard_ends = preprocessed_inputs.lhs_row_pointers[table_name]
@@ -626,17 +531,5 @@ def stack_and_shard_samples(
             row_ids=preprocessed_inputs.lhs_sample_ids[table_name],
             values=preprocessed_inputs.lhs_gains[table_name],
         )
-        out_stats[table_name] = InputStatsPerTable(
-            max_ids_per_partition=np.max(
-                stats.max_ids_per_partition[table_name]
-            ),
-            max_unique_ids_per_partition=np.max(
-                stats.max_unique_ids_per_partition[table_name]
-            ),
-            required_buffer_size_per_device=np.max(
-                stats.required_buffer_size_per_sc[table_name]
-            )
-            * num_sc_per_device,
-        )
-    return out, out_stats
+    return out, stats

{keras_rs_nightly-0.3.1.dev202510060327 → keras_rs_nightly-0.3.1.dev202510080323}/keras_rs/src/version.py RENAMED Viewed

@@ -1,7 +1,7 @@
 from keras_rs.src.api_export import keras_rs_export
 # Unique source of truth for the version number.
-__version__ = "0.3.1.dev202510060327"
+__version__ = "0.3.1.dev202510080323"
 @keras_rs_export("keras_rs.version")

{keras_rs_nightly-0.3.1.dev202510060327 → keras_rs_nightly-0.3.1.dev202510080323}/keras_rs_nightly.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: keras-rs-nightly
-Version: 0.3.1.dev202510060327
+Version: 0.3.1.dev202510080323
 Summary: Multi-backend recommender systems with Keras 3.
 Author-email: Keras team <keras-users@googlegroups.com>
 License: Apache License 2.0

{keras_rs_nightly-0.3.1.dev202510060327 → keras_rs_nightly-0.3.1.dev202510080323}/pyproject.toml RENAMED Viewed

@@ -64,7 +64,7 @@ known-first-party = ["keras_rs"]
 [tool.mypy]
 python_version = "3.10"
 strict = "True"
-exclude = ["_test\\.py$", "^examples/"]
+exclude = ["_test\\.py$", "^examples/", "venv/"]
 untyped_calls_exclude = ["ml_dtypes"]
 disable_error_code = ["import-untyped", "unused-ignore"]
 disallow_subclassing_any = "False"