PyPI - olmoearth-pretrain-minimal - Versions diffs - 0.0.1__py3-none-any.whl - Mend

olmoearth-pretrain-minimal 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

olmoearth_pretrain_minimal/olmoearth_pretrain_v1/nn/latent_mim.py ADDED Viewed

@@ -0,0 +1,166 @@
+"""Simple set up of latent predictor."""
+import logging
+from copy import deepcopy
+from dataclasses import dataclass
+from typing import Any
+import torch
+import torch.nn as nn
+from torch.distributed import DeviceMesh
+from torch.distributed.fsdp import (
+    MixedPrecisionPolicy,
+    fully_shard,
+    register_fsdp_forward_method,
+)
+from olmoearth_pretrain_minimal.olmoearth_pretrain_v1.utils.config import Config
+from olmoearth_pretrain_minimal.olmoearth_pretrain_v1.utils.datatypes import MaskedOlmoEarthSample
+from olmoearth_pretrain_minimal.olmoearth_pretrain_v1.nn.flexi_vit import TokensAndMasks
+from olmoearth_pretrain_minimal.olmoearth_pretrain_v1.nn.utils import DistributedMixins, unpack_encoder_output
+logger = logging.getLogger(__name__)
+class LatentMIM(nn.Module, DistributedMixins):
+    """Latent MIM Style."""
+    supports_multiple_modalities_at_once = True
+    def __init__(
+        self,
+        encoder: nn.Module,
+        decoder: nn.Module,
+        reconstructor: torch.nn.Module | None = None,
+    ):
+        """Initialize the Latent MIM Style.
+        Args:
+            encoder: The encoder to use.
+            decoder: The decoder to use.
+            reconstructor: Optional reconstructor for auto-encoding.
+        """
+        super().__init__()
+        self.encoder = encoder
+        self.decoder = decoder
+        self.reconstructor = reconstructor
+        self.target_encoder = deepcopy(self.encoder)
+        for p in self.target_encoder.parameters():
+            p.requires_grad = False
+    def forward(
+        self, x: MaskedOlmoEarthSample, patch_size: int
+    ) -> tuple[
+        TokensAndMasks,
+        TokensAndMasks,
+        torch.Tensor,
+        TokensAndMasks | None,
+        dict[str, Any],
+    ]:
+        """Forward pass for the Latent MIM Style.
+        Returns:
+            latent: embeddings from encoder
+            decoded: predictions from decoder for masked tokens
+            latent_projected_and_pooled: pooled tokens for contrastive loss
+            reconstructed: MAE predictions if enabled
+        """
+        # TODO: Input And outputs here are not consistent between encoder and decoder need a tokensandmaks++
+        output_dict = self.encoder(x, patch_size=patch_size)
+        token_norm_stats = output_dict.pop("token_norm_stats", None)
+        latent, latent_projected_and_pooled, decoder_kwargs = unpack_encoder_output(
+            output_dict
+        )
+        extra_metrics = {}
+        if token_norm_stats is not None:
+            extra_metrics["token_norm_stats"] = token_norm_stats
+        reconstructed = None
+        if self.reconstructor:
+            reconstructed = self.reconstructor(latent, x.timestamps, patch_size)
+        decoded = self.decoder(
+            latent, timestamps=x.timestamps, patch_size=patch_size, **decoder_kwargs
+        )
+        return (
+            latent,
+            decoded,
+            latent_projected_and_pooled,
+            reconstructed,
+            extra_metrics,
+        )
+    def apply_fsdp(
+        self,
+        dp_mesh: DeviceMesh | None = None,
+        param_dtype: torch.dtype | None = None,
+        reduce_dtype: torch.dtype = torch.float32,
+        prefetch_factor: int = 0,
+    ) -> None:
+        """Apply FSDP to the model."""
+        mp_policy = MixedPrecisionPolicy(
+            param_dtype=param_dtype, reduce_dtype=reduce_dtype
+        )
+        fsdp_config = dict(mesh=dp_mesh, mp_policy=mp_policy)
+        self.encoder.apply_fsdp(**fsdp_config)
+        self.decoder.apply_fsdp(**fsdp_config)
+        self.target_encoder.apply_fsdp(**fsdp_config)
+        if self.reconstructor:
+            self.reconstructor.apply_fsdp(**fsdp_config)
+        # TODO: More finegrained wrapping of the encoder transformer layers next time
+        fully_shard(self, **fsdp_config)
+        register_fsdp_forward_method(self.target_encoder, "forward")
+    def apply_compile(self) -> None:
+        """Apply torch.compile to the model."""
+        logger.info("Applying torch.compile to the model")
+        self.encoder.apply_compile()
+        logger.info("Applied torch.compile to the encoder")
+        self.decoder.apply_compile()
+        logger.info("Applied torch.compile to the decoder")
+        self.target_encoder.apply_compile()
+        logger.info("Applied torch.compile to the target encoder")
+@dataclass
+class LatentMIMConfig(Config):
+    """Configuration for the Latent Predictor."""
+    encoder_config: Config
+    decoder_config: Config
+    reconstructor_config: Config | None = None
+    def validate(self) -> None:
+        """Validate the configuration."""
+        if (
+            self.encoder_config.supported_modalities
+            != self.decoder_config.supported_modalities
+        ):
+            raise ValueError("Encoder and decoder must support the same modalities")
+        if (
+            self.encoder_config.max_sequence_length
+            != self.decoder_config.max_sequence_length
+        ):
+            raise ValueError(
+                "Encoder and decoder must have the same max sequence length"
+            )
+        if (
+            self.encoder_config.embedding_size
+            != self.decoder_config.encoder_embedding_size
+        ):
+            raise ValueError("Encoder embedding size must be consistent!")
+    def build(self) -> "LatentMIM":
+        """Build the Latent Predictor."""
+        self.validate()
+        encoder = self.encoder_config.build()
+        decoder = self.decoder_config.build()
+        reconstructor = (
+            self.reconstructor_config.build()
+            if self.reconstructor_config is not None
+            else None
+        )
+        return LatentMIM(
+            encoder=encoder,
+            decoder=decoder,
+            reconstructor=reconstructor,
+        )

olmoearth_pretrain_minimal/olmoearth_pretrain_v1/nn/tokenization.py ADDED Viewed

@@ -0,0 +1,194 @@
+"""Tokenization configuration for custom band grouping strategies.
+This module allows customizing how bands are grouped into tokens for each modality,
+enabling experiments with different tokenization strategies (e.g., per-band tokens,
+spectral groupings, etc.).
+Example:
+    >>> from olmoearth_pretrain.nn.tokenization import TokenizationConfig, ModalityTokenization
+    >>> from olmoearth_pretrain.utils.constants import Modality
+    >>>
+    >>> # Create config with per-band tokenization for Sentinel-2
+    >>> s2_bands = Modality.SENTINEL2_L2A.band_order
+    >>> config = TokenizationConfig(
+    ...     overrides={
+    ...         Modality.SENTINEL2_L2A.name: ModalityTokenization(
+    ...             band_groups=[[b] for b in s2_bands]
+    ...         )
+    ...     }
+    ... )
+    >>>
+    >>> # Use default tokenization for other modalities
+    >>> num_bandsets = config.get_num_bandsets(Modality.SENTINEL1.name)
+"""
+from dataclasses import dataclass, field
+from olmoearth_pretrain_minimal.olmoearth_pretrain_v1.utils.constants import Modality, ModalitySpec
+@dataclass
+class ModalityTokenization:
+    """Custom tokenization configuration for a single modality.
+    Specifies how bands should be grouped into tokens. Each band_group
+    becomes a separate token.
+    Args:
+        band_groups: List of band groups, where each group is a list of band names.
+    """
+    band_groups: list[list[str]]
+    def compute_indices(self, base_modality: ModalitySpec) -> list[list[int]]:
+        """Map band names to indices based on the base modality's band order.
+        Args:
+            base_modality: The ModalitySpec that defines the canonical band order.
+        Returns:
+            List of index lists, one per band group.
+        Raises:
+            ValueError: If a band name doesn't exist in the modality's band_order.
+        """
+        name_to_idx = {name: i for i, name in enumerate(base_modality.band_order)}
+        result = []
+        for group in self.band_groups:
+            group_indices = []
+            for band in group:
+                if band not in name_to_idx:
+                    raise ValueError(
+                        f"Band '{band}' not found in modality '{base_modality.name}'. "
+                        f"Valid bands: {list(base_modality.band_order)}"
+                    )
+                group_indices.append(name_to_idx[band])
+            result.append(group_indices)
+        return result
+    def get_num_bands_per_group(self) -> list[int]:
+        """Get the number of bands in each group."""
+        return [len(group) for group in self.band_groups]
+    @property
+    def num_band_sets(self) -> int:
+        """Get the number of band sets (token groups)."""
+        return len(self.band_groups)
+    def validate(self, base_modality: ModalitySpec) -> None:
+        """Validate that all band names exist in the modality.
+        Args:
+            base_modality: The ModalitySpec to validate against.
+        Raises:
+            ValueError: If a band name doesn't exist in the modality's band_order.
+        """
+        valid_bands = set(base_modality.band_order)
+        for group in self.band_groups:
+            for band in group:
+                if band not in valid_bands:
+                    raise ValueError(
+                        f"Band '{band}' not found in modality '{base_modality.name}'. "
+                        f"Valid bands: {valid_bands}"
+                    )
+@dataclass
+class TokenizationConfig:
+    """Configuration for custom tokenization strategies.
+    Allows overriding the default bandset groupings for specific modalities.
+    Modalities without overrides use their default bandset configuration
+    from ModalitySpec.
+    """
+    overrides: dict[str, ModalityTokenization] = field(default_factory=dict)
+    _bandset_indices_cache: dict[str, list[list[int]]] = field(
+        default_factory=dict, init=False, repr=False
+    )
+    def get_bandset_indices(self, modality_name: str) -> list[list[int]]:
+        """Get band indices for tokenization, using override or default.
+        Args:
+            modality_name: Name of the modality.
+        Returns:
+            List of index lists, one per bandset/token group.
+        Raises:
+            ValueError: If modality_name is invalid or band names don't exist.
+        """
+        # Check cache first
+        if modality_name in self._bandset_indices_cache:
+            return self._bandset_indices_cache[modality_name]
+        try:
+            base_spec = Modality.get(modality_name)
+        except (AttributeError, AssertionError) as e:
+            raise ValueError(f"Invalid modality: {modality_name}") from e
+        if modality_name in self.overrides:
+            result = self.overrides[modality_name].compute_indices(base_spec)
+        else:
+            result = base_spec.bandsets_as_indices()
+        # Cache the result
+        self._bandset_indices_cache[modality_name] = result
+        return result
+    def get_num_bandsets(self, modality_name: str) -> int:
+        """Get number of bandsets (tokens per spatial location).
+        Args:
+            modality_name: Name of the modality.
+        Returns:
+            Number of bandsets.
+        Raises:
+            ValueError: If modality_name is invalid.
+        """
+        if modality_name in self.overrides:
+            return self.overrides[modality_name].num_band_sets
+        try:
+            return Modality.get(modality_name).num_band_sets
+        except (AttributeError, AssertionError) as e:
+            raise ValueError(f"Invalid modality: {modality_name}") from e
+    def get_num_bands_per_bandset(self, modality_name: str) -> list[int]:
+        """Get the number of bands in each bandset.
+        Args:
+            modality_name: Name of the modality.
+        Returns:
+            List of band counts, one per bandset.
+        Raises:
+            ValueError: If modality_name is invalid.
+        """
+        if modality_name in self.overrides:
+            return self.overrides[modality_name].get_num_bands_per_group()
+        try:
+            base_spec = Modality.get(modality_name)
+        except (AttributeError, AssertionError) as e:
+            raise ValueError(f"Invalid modality: {modality_name}") from e
+        return [len(bs.bands) for bs in base_spec.band_sets]
+    def validate(self) -> None:
+        """Validate all overrides against their modalities.
+        Raises:
+            ValueError: If any modality name or band name is invalid.
+        """
+        for modality_name, tokenization in self.overrides.items():
+            try:
+                base_spec = Modality.get(modality_name)
+            except (AttributeError, AssertionError):
+                raise ValueError(
+                    f"Invalid modality name in overrides: '{modality_name}'. "
+                    f"Valid modalities: {Modality.names()}"
+                )
+            tokenization.validate(base_spec)

olmoearth_pretrain_minimal/olmoearth_pretrain_v1/nn/utils.py ADDED Viewed

@@ -0,0 +1,83 @@
+"""Utilities for the nn module."""
+from typing import Any
+import torch
+from torch.distributed import DeviceMesh
+def unpack_encoder_output(
+    output_dict: dict[str, Any],
+) -> tuple:
+    """Unpack the output of an encoder.
+    Args:
+        output_dict (dict[str, Any]): The output of an encoder.
+    Returns:
+        tuple[TokensAndMasks, TokensAndMasks, dict[str, Any]]: The unpacked output.
+    """
+    latent = output_dict.pop("tokens_and_masks", None)
+    latent_projected_and_pooled = output_dict.pop("project_aggregated", None)
+    # Pass through all other outputs that might be specific to an encoder decoder pair
+    # remove token_norm_stats
+    output_dict.pop("token_norm_stats", None)
+    decoder_kwargs = output_dict
+    return latent, latent_projected_and_pooled, decoder_kwargs
+def get_cumulative_sequence_lengths(seq_lengths: torch.Tensor) -> torch.Tensor:
+    """Get the cumulative sequence lengths of a tensor.
+    Args:
+        seq_lengths (torch.Tensor): The sequence lengths of a tensor.
+    Returns:
+        torch.Tensor: The cumulative sequence lengths of a tensor.
+    """
+    return torch.cat(
+        [
+            torch.tensor([0], dtype=torch.int32, device=seq_lengths.device),
+            torch.cumsum(
+                seq_lengths.masked_select(seq_lengths != 0), 0, dtype=torch.int32
+            ),
+        ]
+    )
+# TODO: maybe this should just be functional or something
+class DistributedMixins:
+    """Mixin for distributed training."""
+    def apply_ddp(
+        self,
+        dp_mesh: DeviceMesh | None = None,
+        compile_enabled: bool = False,
+        autograd_compile_enabled: bool = False,
+        find_unused_parameters: bool = True,
+    ) -> None:
+        """Apply DDP to the model.
+        .. warning::
+            Usually this does not need to be called directly, as :meth:`TransformerConfig.build()`
+            will call it for you.
+        """
+        from torch.distributed._composable.replicate import replicate
+        # Adapted from
+        # https://github.com/pytorch/torchtitan/blob/90c889e972b56b9faadebbb78fc985dedc537ed9/torchtitan/parallelisms/parallelize_llama.py#L328
+        if compile_enabled:
+            if autograd_compile_enabled:
+                torch._dynamo.config.optimize_ddp = (
+                    "python_reducer_without_compiled_forward"  # type: ignore
+                )
+            else:
+                torch._dynamo.config.optimize_ddp = "ddp_optimizer"  # type: ignore
+        # Forwards kwargs to torch DDP class, find_unused_parameters=True is required for MAE
+        # Small performance hit could be possible for other models
+        replicate(
+            self,
+            device_mesh=dp_mesh,
+            bucket_cap_mb=100,
+            find_unused_parameters=find_unused_parameters,
+        )

olmoearth_pretrain_minimal/olmoearth_pretrain_v1/olmoearth_pretrain_v1.py ADDED Viewed

@@ -0,0 +1,152 @@
+"""OlmoEarth Pretrain v1 model initialization.
+This module provides a simple interface to initialize OlmoEarth v1 models.
+"""
+from __future__ import annotations
+from typing import Literal
+import torch
+from olmoearth_pretrain_minimal.olmoearth_pretrain_v1.utils.constants import Modality
+from olmoearth_pretrain_minimal.olmoearth_pretrain_v1.nn.flexi_vit import EncoderConfig, PredictorConfig
+from olmoearth_pretrain_minimal.olmoearth_pretrain_v1.nn.latent_mim import LatentMIM, LatentMIMConfig
+# Model size configurations matching the official OlmoEarth v1 models
+MODEL_SIZE_CONFIGS = {
+    "nano_shallow_decoder": {
+        "decoder_depth": 4,
+        "encoder_embedding_size": 128,
+        "decoder_embedding_size": 128,
+        "encoder_depth": 4,
+        "encoder_num_heads": 8,
+        "decoder_num_heads": 8,
+        "mlp_ratio": 4.0,
+    },
+    "tiny_shallow_decoder": {
+        "decoder_depth": 4,
+        "encoder_embedding_size": 192,
+        "decoder_embedding_size": 192,
+        "encoder_depth": 12,
+        "encoder_num_heads": 3,
+        "decoder_num_heads": 3,
+        "mlp_ratio": 4.0,
+    },
+    "base_shallow_decoder": {
+        "decoder_depth": 4,
+        "encoder_embedding_size": 768,
+        "decoder_embedding_size": 768,
+        "encoder_depth": 12,
+        "encoder_num_heads": 12,
+        "decoder_num_heads": 12,
+        "mlp_ratio": 4.0,
+    },
+    "large_shallow_decoder": {
+        "decoder_depth": 4,
+        "encoder_embedding_size": 1024,
+        "decoder_embedding_size": 1024,
+        "encoder_depth": 24,
+        "encoder_num_heads": 16,
+        "decoder_num_heads": 16,
+        "mlp_ratio": 4.0,
+    },
+}
+# Default modalities used in OlmoEarth v1 training
+DEFAULT_MODALITIES = [
+    Modality.SENTINEL2_L2A.name,
+    Modality.SENTINEL1.name,
+    Modality.LANDSAT.name,
+    Modality.WORLDCOVER.name,
+    Modality.SRTM.name,
+    Modality.OPENSTREETMAP_RASTER.name,
+    Modality.WRI_CANOPY_HEIGHT_MAP.name,
+    Modality.CDL.name,
+    Modality.WORLDCEREAL.name,
+]
+class OlmoEarthPretrain_v1(torch.nn.Module):
+    """OlmoEarth Pretrain v1 model.
+    This class provides a simple interface to initialize OlmoEarth v1 models
+    directly from the repository. Models are initialized with random weights.
+    """
+    def __init__(
+        self,
+        model_size: Literal["nano", "tiny", "base", "large"] = "nano",
+        supported_modality_names: list[str] | None = None,
+        max_patch_size: int = 8,
+        max_sequence_length: int = 12,
+        drop_path: float = 0.1,
+    ) -> None:
+        """Initialize an OlmoEarth Pretrain v1 model.
+        Args:
+            model_size: Size of the model. Options: "nano", "tiny", "base", "large".
+            supported_modality_names: List of modality names to support. If None,
+                uses the default modalities from OlmoEarth v1 training.
+            max_patch_size: Maximum patch size for the encoder.
+            max_sequence_length: Maximum sequence length.
+            drop_path: Drop path rate for regularization.
+        """
+        super().__init__()
+        # Map user-facing model size to internal config key with shallow_decoder suffix
+        config_key = f"{model_size}_shallow_decoder"
+        if config_key not in MODEL_SIZE_CONFIGS:
+            raise ValueError(
+                f"Invalid model_size: {model_size}. "
+                f"Must be one of {['nano', 'tiny', 'base', 'large']}"
+            )
+        if supported_modality_names is None:
+            supported_modality_names = DEFAULT_MODALITIES
+        model_config = MODEL_SIZE_CONFIGS[config_key]
+        # Build encoder config
+        encoder_config = EncoderConfig(
+            embedding_size=model_config["encoder_embedding_size"],
+            num_heads=model_config["encoder_num_heads"],
+            depth=model_config["encoder_depth"],
+            mlp_ratio=model_config["mlp_ratio"],
+            supported_modality_names=supported_modality_names,
+            max_patch_size=max_patch_size,
+            drop_path=drop_path,
+            max_sequence_length=max_sequence_length,
+        )
+        # Build decoder config
+        decoder_config = PredictorConfig(
+            encoder_embedding_size=model_config["encoder_embedding_size"],
+            decoder_embedding_size=model_config["decoder_embedding_size"],
+            depth=model_config["decoder_depth"],
+            mlp_ratio=model_config["mlp_ratio"],
+            num_heads=model_config["decoder_num_heads"],
+            supported_modality_names=supported_modality_names,
+            max_sequence_length=max_sequence_length,
+        )
+        # Build model config and initialize the model
+        model_config_obj = LatentMIMConfig(
+            encoder_config=encoder_config,
+            decoder_config=decoder_config,
+        )
+        self.model = model_config_obj.build()
+    def forward(self, *args, **kwargs):
+        """Forward pass through the model."""
+        return self.model(*args, **kwargs)
+    def __getattr__(self, name: str):
+        """Delegate attribute access to the underlying model."""
+        try:
+            return super().__getattr__(name)
+        except AttributeError:
+            return getattr(self.model, name)

olmoearth_pretrain_minimal/olmoearth_pretrain_v1/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ """Utility modules for OlmoEarth Pretrain."""
2	+