PyPI - moscot - Versions diffs - 0.3.4__tar.gz → 0.4.0__tar.gz - Mend

moscot 0.3.4tar.gz → 0.4.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (81) hide show

{moscot-0.3.4 → moscot-0.4.0}/.gitignore RENAMED Viewed

@@ -154,3 +154,4 @@ packages.dot
 # plotting tests
 tests/plotting/actual_figures/
+tests/plotting/figures/

{moscot-0.3.4 → moscot-0.4.0}/.pre-commit-config.yaml RENAMED Viewed

@@ -2,18 +2,18 @@ fail_fast: false
 default_language_version:
   python: python3
 default_stages:
-  - commit
-  - push
+  - pre-commit
+  - pre-push
 minimum_pre_commit_version: 3.0.0
 repos:
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.8.0
+    rev: v1.13.0
     hooks:
       - id: mypy
         additional_dependencies: [numpy>=1.25.0]
         files: ^src
   - repo: https://github.com/psf/black
-    rev: 24.2.0
+    rev: 24.10.0
     hooks:
       - id: black
         additional_dependencies: [toml]
@@ -29,7 +29,7 @@ repos:
         additional_dependencies: [toml]
         args: [--order-by-type]
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.5.0
+    rev: v5.0.0
     hooks:
       - id: check-merge-conflict
       - id: check-ast
@@ -42,28 +42,28 @@ repos:
       - id: check-yaml
       - id: check-toml
   - repo: https://github.com/asottile/pyupgrade
-    rev: v3.15.1
+    rev: v3.19.0
     hooks:
       - id: pyupgrade
         args: [--py3-plus, --py38-plus, --keep-runtime-typing]
   - repo: https://github.com/asottile/blacken-docs
-    rev: 1.16.0
+    rev: 1.19.1
     hooks:
       - id: blacken-docs
         additional_dependencies: [black==23.1.0]
   - repo: https://github.com/rstcheck/rstcheck
-    rev: v6.2.0
+    rev: v6.2.4
     hooks:
       - id: rstcheck
         additional_dependencies: [tomli]
         args: [--config=pyproject.toml]
   - repo: https://github.com/PyCQA/doc8
-    rev: v1.1.1
+    rev: v1.1.2
     hooks:
       - id: doc8
   - repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.2.2
+    rev: v0.7.2
     hooks:
       - id: ruff
         args: [--fix, --exit-non-zero-on-fix]

moscot-0.4.0/.run_notebooks.sh ADDED Viewed

@@ -0,0 +1,62 @@
+#!/bin/bash
+# Check if the base directory is provided as an argument
+if [ "$#" -ne 1 ]; then
+    echo "Usage: $0 <base_notebook_directory>"
+    exit 1
+fi
+# Base directory for notebooks
+base_dir=$1
+# Define notebook directories or patterns
+declare -a notebooks=(
+    "$base_dir/examples/plotting/*.ipynb"
+    "$base_dir/examples/problems/*.ipynb"
+    "$base_dir/examples/solvers/*.ipynb"
+)
+# Initialize an array to hold valid notebook paths
+declare -a valid_notebooks
+# Gather all valid notebook files from the patterns
+echo "Gathering notebooks..."
+for pattern in "${notebooks[@]}"; do
+    for nb in $pattern; do
+        if [[ -f "$nb" ]]; then  # Check if the file exists
+            valid_notebooks+=("$nb")  # Add to the list of valid notebooks
+        fi
+    done
+done
+# Check if we have any notebooks to run
+if [ ${#valid_notebooks[@]} -eq 0 ]; then
+    echo "No notebooks found to run."
+    exit 1
+fi
+# Echo the notebooks that will be run for clarity
+echo "Preparing to run the following notebooks:"
+for nb in "${valid_notebooks[@]}"; do
+    echo "$nb"
+done
+# Initialize a flag to track the success of all commands
+all_success=true
+# Execute all valid notebooks
+for nb in "${valid_notebooks[@]}"; do
+    echo "Running $nb"
+    jupytext -k moscot --execute "$nb" || {
+        echo "Failed to run $nb"
+        all_success=false
+    }
+done
+# Check if any executions failed
+if [ "$all_success" = false ]; then
+    echo "One or more notebooks failed to execute."
+    exit 1
+fi
+echo "All notebooks executed successfully."

{moscot-0.3.4 → moscot-0.4.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: moscot
-Version: 0.3.4
+Version: 0.4.0
 Summary: Multi-omic single-cell optimal transport tools
 Author: Dominik Klein, Giovanni Palla, Michal Klein, Zoe Piran, Marius Lange
 Maintainer-email: Dominik Klein <dominik.klein@helmholtz-muenchen.de>, Giovanni Palla <giovanni.palla@helmholtz-muenchen.de>, Michal Klein <michal.klein@helmholtz-muenchen.de>
@@ -49,28 +49,33 @@ Classifier: Operating System :: MacOS :: MacOS X
 Classifier: Operating System :: Microsoft :: Windows
 Classifier: Typing :: Typed
 Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.8
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
 Classifier: Topic :: Scientific/Engineering :: Mathematics
-Requires-Python: >=3.8
+Requires-Python: >=3.10
 Description-Content-Type: text/x-rst
 License-File: LICENSE
 Requires-Dist: numpy>=1.20.0
 Requires-Dist: scipy>=1.7.0
 Requires-Dist: pandas>=2.0.1
-Requires-Dist: networkx>=2.6.3
+Requires-Dist: networkx>=3.2
 Requires-Dist: matplotlib>=3.5.0
 Requires-Dist: anndata>=0.9.1
 Requires-Dist: scanpy>=1.9.3
 Requires-Dist: wrapt>=1.13.2
 Requires-Dist: docrep>=0.3.2
-Requires-Dist: ott-jax>=0.4.5
+Requires-Dist: ott-jax[neural]>=0.5.0
 Requires-Dist: cloudpickle>=2.2.0
 Requires-Dist: rich>=13.5
+Requires-Dist: docstring_inheritance>=2.0.0
+Requires-Dist: mudata>=0.2.2
 Provides-Extra: spatial
 Requires-Dist: squidpy>=1.2.3; extra == "spatial"
+Provides-Extra: neural
+Requires-Dist: optax; extra == "neural"
+Requires-Dist: flax; extra == "neural"
+Requires-Dist: diffrax; extra == "neural"
 Provides-Extra: dev
 Requires-Dist: pre-commit>=3.0.0; extra == "dev"
 Requires-Dist: tox>=4; extra == "dev"

{moscot-0.3.4 → moscot-0.4.0}/pyproject.toml RENAMED Viewed

@@ -7,7 +7,7 @@ name = "moscot"
 dynamic = ["version"]
 description = "Multi-omic single-cell optimal transport tools"
 readme = "README.rst"
-requires-python = ">=3.8"
+requires-python = ">=3.10"
 license = {file = "LICENSE"}
 classifiers = [
     "Development Status :: 4 - Beta",
@@ -19,7 +19,6 @@ classifiers = [
     "Operating System :: Microsoft :: Windows",
     "Typing :: Typed",
     "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.8",
     "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Topic :: Scientific/Engineering :: Bio-Informatics",
@@ -48,22 +47,31 @@ dependencies = [
     "numpy>=1.20.0",
     "scipy>=1.7.0",
     "pandas>=2.0.1",
-    "networkx>=2.6.3",
+    "networkx>=3.2",
     # https://github.com/scverse/scanpy/issues/2411
     "matplotlib>=3.5.0",
     "anndata>=0.9.1",
     "scanpy>=1.9.3",
     "wrapt>=1.13.2",
     "docrep>=0.3.2",
-    "ott-jax>=0.4.5",
+    "ott-jax[neural]>=0.5.0",
     "cloudpickle>=2.2.0",
     "rich>=13.5",
+    "docstring_inheritance>=2.0.0",
+    "mudata>=0.2.2"
 ]
 [project.optional-dependencies]
 spatial = [
     "squidpy>=1.2.3"
 ]
+neural = [
+    "optax",
+    "flax",
+    "diffrax",
+]
 dev = [
     "pre-commit>=3.0.0",
     "tox>=4",
@@ -225,7 +233,7 @@ ignore_roles = [
 [tool.mypy]
 mypy_path = "$MYPY_CONFIG_FILE_DIR/src"
-python_version = "3.9"
+python_version = "3.10"
 plugins = "numpy.typing.mypy_plugin"
 ignore_errors = false
@@ -262,16 +270,16 @@ max_line_length = 120
 legacy_tox_ini = """
 [tox]
 min_version = 4.0
-env_list = lint-code,py{3.8,3.9,3.10,3.11}
+env_list = lint-code,py{3.10,3.11,3.12}
 skip_missing_interpreters = true
 [testenv]
 extras = test
-pass_env = PYTEST_*,CI
 commands =
     python -m pytest {tty:--color=yes} {posargs: \
         --cov={env_site_packages_dir}{/}moscot --cov-config={tox_root}{/}pyproject.toml \
         --no-cov-on-fail --cov-report=xml --cov-report=term-missing:skip-covered}
+passenv = PYTEST_*,CI
 [testenv:lint-code]
 description = Lint the code.
@@ -282,7 +290,6 @@ commands =
 [testenv:lint-docs]
 description = Lint the documentation.
-deps =
 extras = docs
 ignore_errors = true
 allowlist_externals = make
@@ -294,6 +301,21 @@ commands =
     # TODO(michalk8): uncomment after https://github.com/theislab/moscot/issues/490
     # make spelling {posargs}
+[testenv:examples-docs]
+allowlist_externals = bash
+description = Run the notebooks.
+use_develop = true
+deps =
+    ipykernel
+    jupytext
+    nbconvert
+    leidenalg
+extras = docs
+changedir = {tox_root}{/}docs
+commands =
+    python -m ipykernel install --user --name=moscot
+    bash {tox_root}/.run_notebooks.sh {tox_root}{/}docs/notebooks
 [testenv:clean-docs]
 description = Remove the documentation.
 deps =

moscot-0.4.0/src/moscot/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+from importlib import metadata
+from moscot import backends, base, costs, datasets, plotting, problems, utils
+try:
+    md = metadata.metadata(__name__)
+    __version__ = md.get("version", "")  # type: ignore[attr-defined]
+    __author__ = md.get("Author", "")  # type: ignore[attr-defined]
+    __maintainer__ = md.get("Maintainer-email", "")  # type: ignore[attr-defined]
+except ImportError:
+    md = None
+del metadata, md

{moscot-0.3.4 → moscot-0.4.0}/src/moscot/_types.py RENAMED Viewed

@@ -2,6 +2,9 @@ import os
 from typing import Any, Literal, Mapping, Optional, Sequence, Union
 import numpy as np
+from ott.initializers.linear.initializers import SinkhornInitializer
+from ott.initializers.linear.initializers_lr import LRInitializer
+from ott.initializers.quadratic.initializers import BaseQuadraticInitializer
 # TODO(michalk8): polish
@@ -17,13 +20,14 @@ ProblemKind_t = Literal["linear", "quadratic", "unknown"]
 Numeric_t = Union[int, float]  # type of `time_key` arguments
 Filter_t = Optional[Union[str, Mapping[str, Sequence[Any]]]]  # type how to filter adata
 Str_Dict_t = Optional[Union[str, Mapping[str, Sequence[Any]]]]  # type for `cell_transition`
-SinkFullRankInit = Literal["default", "gaussian", "sorting"]
-LRInitializer_t = Literal["random", "rank2", "k-means", "generalized-k-means"]
+SinkhornInitializerTag_t = Literal["default", "gaussian", "sorting"]
+LRInitializerTag_t = Literal["random", "rank2", "k-means", "generalized-k-means"]
-SinkhornInitializer_t = Optional[Union[SinkFullRankInit, LRInitializer_t]]
-QuadInitializer_t = Optional[LRInitializer_t]
+LRInitializer_t = Optional[Union[LRInitializer, LRInitializerTag_t]]
+SinkhornInitializer_t = Optional[Union[SinkhornInitializer, SinkhornInitializerTag_t]]
+QuadInitializer_t = Optional[Union[BaseQuadraticInitializer]]
-Initializer_t = Union[SinkhornInitializer_t, LRInitializer_t]
+Initializer_t = Union[SinkhornInitializer_t, QuadInitializer_t, LRInitializer_t]
 ProblemStage_t = Literal["prepared", "solved"]
 Device_t = Union[Literal["cpu", "gpu", "tpu"], str]
@@ -36,10 +40,6 @@ OttCostFn_t = Literal[
     "pnorm_p",
     "sq_pnorm",
     "cosine",
-    "elastic_l1",
-    "elastic_l2",
-    "elastic_stvs",
-    "elastic_sqk_overlap",
     "geodesic",
 ]
 OttCostFnMap_t = Union[OttCostFn_t, Mapping[Literal["xy", "x", "y"], OttCostFn_t]]

moscot-0.4.0/src/moscot/backends/ott/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+from ott.geometry import costs
+from moscot.backends.ott._utils import sinkhorn_divergence
+from moscot.backends.ott.output import GraphOTTOutput, OTTNeuralOutput, OTTOutput
+from moscot.backends.ott.solver import GENOTLinSolver, GWSolver, SinkhornSolver
+from moscot.costs import register_cost
+__all__ = ["OTTOutput", "GWSolver", "SinkhornSolver", "OTTNeuralOutput", "sinkhorn_divergence", "GENOTLinSolver"]
+register_cost("euclidean", backend="ott")(costs.Euclidean)
+register_cost("sq_euclidean", backend="ott")(costs.SqEuclidean)
+register_cost("cosine", backend="ott")(costs.Cosine)
+register_cost("pnorm_p", backend="ott")(costs.PNormP)
+register_cost("sq_pnorm", backend="ott")(costs.SqPNorm)

moscot-0.4.0/src/moscot/backends/ott/_utils.py ADDED Viewed

@@ -0,0 +1,331 @@
+from collections import defaultdict
+from functools import partial
+from typing import Any, Dict, Iterable, Literal, Optional, Tuple, Union
+import jax
+import jax.experimental.sparse as jesp
+import jax.numpy as jnp
+import numpy as np
+import scipy.sparse as sp
+from ott.geometry import epsilon_scheduler, geodesic, geometry, pointcloud
+from ott.initializers.linear import initializers as init_lib
+from ott.initializers.linear import initializers_lr as lr_init_lib
+from ott.neural import datasets
+from ott.solvers import utils as solver_utils
+from ott.tools.sinkhorn_divergence import sinkhorn_divergence as sinkhorn_div
+from moscot._logging import logger
+from moscot._types import ArrayLike, ScaleCost_t
+Scale_t = Union[float, Literal["mean", "median", "max_cost", "max_norm", "max_bound"]]
+__all__ = ["sinkhorn_divergence"]
+class InitializerResolver:
+    """Class for creating various OT solver initializers.
+    This class provides static methods to create and manage different types of
+    initializers used in optimal transport solvers, including low-rank, k-means,
+    and standard Sinkhorn initializers.
+    """
+    @staticmethod
+    def lr_from_str(
+        initializer: str,
+        rank: int,
+        **kwargs: Any,
+    ) -> lr_init_lib.LRInitializer:
+        """Create a low-rank initializer from a string specification.
+        Parameters
+        ----------
+        initializer : str
+            Either existing initializer instance or string specifier.
+        rank : int
+            Rank for the initialization.
+        **kwargs : Any
+            Additional keyword arguments for initializer creation.
+        Returns
+        -------
+        LRInitializer
+            Configured low-rank initializer.
+        Raises
+        ------
+        NotImplementedError
+            If requested initializer type is not implemented.
+        """
+        if isinstance(initializer, lr_init_lib.LRInitializer):
+            return initializer
+        if initializer == "k-means":
+            return lr_init_lib.KMeansInitializer(rank=rank, **kwargs)
+        if initializer == "generalized-k-means":
+            return lr_init_lib.GeneralizedKMeansInitializer(rank=rank, **kwargs)
+        if initializer == "random":
+            return lr_init_lib.RandomInitializer(rank=rank, **kwargs)
+        if initializer == "rank2":
+            return lr_init_lib.Rank2Initializer(rank=rank, **kwargs)
+        raise NotImplementedError(f"Initializer `{initializer}` is not implemented.")
+    @staticmethod
+    def from_str(
+        initializer: str,
+        **kwargs: Any,
+    ) -> init_lib.SinkhornInitializer:
+        """Create a Sinkhorn initializer from a string specification.
+        Parameters
+        ----------
+        initializer : str
+            String specifier for initializer type.
+        **kwargs : Any
+            Additional keyword arguments for initializer creation.
+        Returns
+        -------
+        SinkhornInitializer
+            Configured Sinkhorn initializer.
+        Raises
+        ------
+        NotImplementedError
+            If requested initializer type is not implemented.
+        """
+        if isinstance(initializer, init_lib.SinkhornInitializer):
+            return initializer
+        if initializer == "default":
+            return init_lib.DefaultInitializer(**kwargs)
+        if initializer == "gaussian":
+            return init_lib.GaussianInitializer(**kwargs)
+        if initializer == "sorting":
+            return init_lib.SortingInitializer(**kwargs)
+        if initializer == "subsample":
+            return init_lib.SubsampleInitializer(**kwargs)
+        raise NotImplementedError(f"Initializer `{initializer}` is not yet implemented.")
+def sinkhorn_divergence(
+    point_cloud_1: ArrayLike,
+    point_cloud_2: ArrayLike,
+    a: Optional[ArrayLike] = None,
+    b: Optional[ArrayLike] = None,
+    epsilon: Union[float, epsilon_scheduler.Epsilon] = 1e-1,
+    tau_a: float = 1.0,
+    tau_b: float = 1.0,
+    scale_cost: ScaleCost_t = 1.0,
+    batch_size: Optional[int] = None,
+    **kwargs: Any,
+) -> float:
+    point_cloud_1 = jnp.asarray(point_cloud_1)
+    point_cloud_2 = jnp.asarray(point_cloud_2)
+    a = None if a is None else jnp.asarray(a)
+    b = None if b is None else jnp.asarray(b)
+    output = sinkhorn_div(
+        pointcloud.PointCloud,
+        x=point_cloud_1,
+        y=point_cloud_2,
+        batch_size=batch_size,
+        a=a,
+        b=b,
+        scale_cost=scale_cost,
+        epsilon=epsilon,
+        solve_kwargs={
+            "tau_a": tau_a,
+            "tau_b": tau_b,
+        },
+        **kwargs,
+    )[1]
+    xy_conv, xx_conv, *yy_conv = output.converged
+    if not xy_conv:
+        logger.warning("Solver did not converge in the `x/y` term.")
+    if not xx_conv:
+        logger.warning("Solver did not converge in the `x/x` term.")
+    if len(yy_conv) and not yy_conv[0]:
+        logger.warning("Solver did not converge in the `y/y` term.")
+    return float(output.divergence)
+@partial(jax.jit, static_argnames=["k"])
+def get_nearest_neighbors(
+    input_batch: jnp.ndarray,
+    target: jnp.ndarray,
+    k: int = 30,
+    recall_target: float = 0.95,
+    aggregate_to_topk: bool = True,
+) -> Tuple[jnp.ndarray, jnp.ndarray]:
+    """Get the k nearest neighbors of the input batch in the target."""
+    if target.shape[0] < k:
+        raise ValueError(f"k is {k}, but must be smaller or equal than {target.shape[0]}.")
+    pairwise_euclidean_distances = pointcloud.PointCloud(input_batch, target).cost_matrix
+    return jax.lax.approx_min_k(
+        pairwise_euclidean_distances, k=k, recall_target=recall_target, aggregate_to_topk=aggregate_to_topk
+    )
+def check_shapes(geom_x: geometry.Geometry, geom_y: geometry.Geometry, geom_xy: geometry.Geometry) -> None:
+    n, m = geom_xy.shape
+    n_, m_ = geom_x.shape[0], geom_y.shape[0]
+    if n != n_:
+        raise ValueError(f"Expected the first geometry to have `{n}` points, found `{n_}`.")
+    if m != m_:
+        raise ValueError(f"Expected the second geometry to have `{m}` points, found `{m_}`.")
+def alpha_to_fused_penalty(alpha: float) -> float:
+    """Convert."""
+    if not (0 < alpha <= 1):
+        raise ValueError(f"Expected `alpha` to be in interval `(0, 1]`, found `{alpha}`.")
+    return (1 - alpha) / alpha
+def densify(arr: ArrayLike) -> jax.Array:
+    """If the input is sparse, convert it to dense.
+    Parameters
+    ----------
+    arr
+        Array to check.
+    Returns
+    -------
+    dense :mod:`jax` array.
+    """
+    if sp.issparse(arr):
+        arr = arr.toarray()  # type: ignore[attr-defined]
+    elif isinstance(arr, jesp.BCOO):
+        arr = arr.todense()
+    return jnp.asarray(arr)
+def ensure_2d(arr: ArrayLike, *, reshape: bool = False) -> jax.Array:
+    """Ensure that an array is 2-dimensional.
+    Parameters
+    ----------
+    arr
+        Array to check.
+    reshape
+        Allow reshaping 1-dimensional array to ``[n, 1]``.
+    Returns
+    -------
+    2-dimensional :mod:`jax` array.
+    """
+    if reshape and arr.ndim == 1:
+        return jnp.reshape(arr, (-1, 1))
+    if arr.ndim != 2:
+        raise ValueError(f"Expected array to have 2 dimensions, found `{arr.ndim}`.")
+    return arr.astype(jnp.float64)
+def convert_scipy_sparse(arr: Union[sp.spmatrix, jesp.BCOO]) -> jesp.BCOO:
+    """If the input is a scipy sparse matrix, convert it to a jax BCOO."""
+    if sp.issparse(arr):
+        return jesp.BCOO.from_scipy_sparse(arr)
+    return arr
+def _instantiate_geodesic_cost(
+    arr: jax.Array,
+    problem_shape: Tuple[int, int],
+    t: Optional[float],
+    is_linear_term: bool,
+    epsilon: Union[float, epsilon_scheduler.Epsilon] = None,
+    relative_epsilon: Optional[bool] = None,
+    scale_cost: Scale_t = 1.0,
+    directed: bool = True,
+    **kwargs: Any,
+) -> geometry.Geometry:
+    n_src, n_tgt = problem_shape
+    if is_linear_term and n_src + n_tgt != arr.shape[0]:
+        raise ValueError(f"Expected `x` to have `{n_src + n_tgt}` points, found `{arr.shape[0]}`.")
+    t = epsilon / 4.0 if t is None else t
+    cm_full = geodesic.Geodesic.from_graph(arr, t=t, directed=directed, **kwargs).cost_matrix
+    cm = cm_full[:n_src, n_src:] if is_linear_term else cm_full
+    return geometry.Geometry(cm, epsilon=epsilon, relative_epsilon=relative_epsilon, scale_cost=scale_cost)
+def data_match_fn(
+    src_lin: Optional[jnp.ndarray] = None,
+    tgt_lin: Optional[jnp.ndarray] = None,
+    src_quad: Optional[jnp.ndarray] = None,
+    tgt_quad: Optional[jnp.ndarray] = None,
+    *,
+    typ: Literal["lin", "quad", "fused"],
+    **data_match_fn_kwargs,
+) -> jnp.ndarray:
+    if typ == "lin":
+        return solver_utils.match_linear(x=src_lin, y=tgt_lin, **data_match_fn_kwargs)
+    if typ == "quad":
+        return solver_utils.match_quadratic(xx=src_quad, yy=tgt_quad, **data_match_fn_kwargs)
+    if typ == "fused":
+        return solver_utils.match_quadratic(xx=src_quad, yy=tgt_quad, x=src_lin, y=tgt_lin, **data_match_fn_kwargs)
+    raise NotImplementedError(f"Unknown type: {typ}.")
+class Loader:
+    def __init__(self, dataset: datasets.OTDataset, batch_size: int, seed: Optional[int] = None):
+        self.dataset = dataset
+        self.batch_size = batch_size
+        self._rng = np.random.default_rng(seed)
+    def __iter__(self):
+        return self
+    def __next__(self) -> Dict[str, jnp.ndarray]:
+        data = defaultdict(list)
+        for _ in range(self.batch_size):
+            ix = self._rng.integers(0, len(self.dataset))
+            for k, v in self.dataset[ix].items():
+                data[k].append(v)
+        return {k: jnp.vstack(v) for k, v in data.items()}
+    def __len__(self):
+        return len(self.dataset)
+class MultiLoader:
+    """Dataset for OT problems with conditions.
+    This data loader wraps several data loaders and samples from them.
+    Args:
+      datasets: Datasets to sample from.
+      seed: Random seed.
+    """
+    def __init__(
+        self,
+        datasets: Iterable[Loader],
+        seed: Optional[int] = None,
+    ):
+        self.datasets = tuple(datasets)
+        self._rng = np.random.default_rng(seed)
+        self._iterators: list[MultiLoader] = []
+        self._it = 0
+    def __next__(self) -> Dict[str, jnp.ndarray]:
+        self._it += 1
+        ix = self._rng.choice(len(self._iterators))
+        iterator = self._iterators[ix]
+        if self._it < len(self):
+            return next(iterator)
+        # reset the consumed iterator and return it's first element
+        self._iterators[ix] = iterator = iter(self.datasets[ix])
+        return next(iterator)
+    def __iter__(self) -> "MultiLoader":
+        self._it = 0
+        self._iterators = [iter(ds) for ds in self.datasets]
+        return self
+    def __len__(self) -> int:
+        return max((len(ds) for ds in self.datasets), default=0)

moscot 0.3.4__tar.gz → 0.4.0__tar.gz

moscot 0.3.4tar.gz → 0.4.0tar.gz