PyPI - rslearn - Versions diffs - 0.0.9__py3-none-any.whl → 0.0.11__py3-none-any.whl - Mend

rslearn 0.0.9py3-none-any.whl → 0.0.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

rslearn/models/olmoearth_pretrain/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """OlmoEarth model architecture."""

rslearn/models/olmoearth_pretrain/model.py ADDED Viewed

@@ -0,0 +1,203 @@
+"""OlmoEarth model wrapper for fine-tuning in rslearn."""
+import json
+from contextlib import nullcontext
+from typing import Any
+import torch
+from einops import rearrange
+from olmo_core.config import Config
+from olmo_core.distributed.checkpoint import load_model_and_optim_state
+from olmoearth_pretrain.data.constants import Modality
+from olmoearth_pretrain.nn.flexihelios import Encoder, TokensAndMasks
+from olmoearth_pretrain.train.masking import MaskedOlmoEarthSample, MaskValue
+from upath import UPath
+from rslearn.log_utils import get_logger
+logger = get_logger(__name__)
+MODALITY_NAMES = [
+    "sentinel2_l2a",
+    "sentinel1",
+    "worldcover",
+    "openstreetmap_raster",
+    "landsat",
+]
+AUTOCAST_DTYPE_MAP = {
+    "bfloat16": torch.bfloat16,
+    "float16": torch.float16,
+    "float32": torch.float32,
+}
+class OlmoEarth(torch.nn.Module):
+    """A wrapper to support the OlmoEarth model."""
+    def __init__(
+        self,
+        # TODO: we should accept model ID instead of checkpoint_path once we are closer
+        # to being ready for release.
+        checkpoint_path: str,
+        selector: list[str | int] = [],
+        forward_kwargs: dict[str, Any] = {},
+        random_initialization: bool = False,
+        embedding_size: int | None = None,
+        patch_size: int | None = None,
+        autocast_dtype: str | None = "bfloat16",
+    ):
+        """Create a new OlmoEarth model.
+        Args:
+            checkpoint_path: the checkpoint directory to load. It should contain
+                config.json file as well as model_and_optim folder.
+            selector: an optional sequence of attribute names or list indices to select
+                the sub-module that should be applied on the input images.
+            forward_kwargs: additional arguments to pass to forward pass besides the
+                 MaskedOlmoEarthSample.
+            random_initialization: whether to skip loading the checkpoint so the
+                weights are randomly initialized. In this case, the checkpoint is only
+                used to define the model architecture.
+            embedding_size: optional embedding size to report via
+                get_backbone_channels.
+            patch_size: optional patch size to report via get_backbone_channels.
+            autocast_dtype: which dtype to use for autocasting, or set None to disable.
+        """
+        super().__init__()
+        _checkpoint_path = UPath(checkpoint_path)
+        self.forward_kwargs = forward_kwargs
+        self.embedding_size = embedding_size
+        self.patch_size = patch_size
+        if autocast_dtype is not None:
+            self.autocast_dtype = AUTOCAST_DTYPE_MAP[autocast_dtype]
+        else:
+            self.autocast_dtype = None
+        # Load the model config and initialize it.
+        # We avoid loading the train module here because it depends on running within
+        # olmo_core.
+        with (_checkpoint_path / "config.json").open() as f:
+            config_dict = json.load(f)
+            model_config = Config.from_dict(config_dict["model"])
+        model = model_config.build()
+        # Load the checkpoint.
+        if not random_initialization:
+            train_module_dir = _checkpoint_path / "model_and_optim"
+            if train_module_dir.exists():
+                load_model_and_optim_state(str(train_module_dir), model)
+                logger.info(f"loaded OlmoEarth encoder from {train_module_dir}")
+            else:
+                logger.info(f"could not find OlmoEarth encoder at {train_module_dir}")
+        else:
+            logger.info("skipping loading OlmoEarth encoder")
+        # Select just the portion of the model that we actually want to use.
+        for part in selector:
+            if isinstance(part, str):
+                model = getattr(model, part)
+            else:
+                model = model[part]
+        self.model = model
+    def forward(self, inputs: list[dict[str, Any]]) -> list[torch.Tensor]:
+        """Compute feature maps from the OlmoEarth backbone.
+        Inputs:
+            inputs: input dicts. It should include keys corresponding to the modalities
+                that should be passed to the OlmoEarth model.
+        """
+        kwargs = {}
+        present_modalities = []
+        device = None
+        # Handle the case where some modalities are multitemporal and some are not.
+        # We assume all multitemporal modalities have the same number of timesteps.
+        max_timesteps = 1
+        for modality in MODALITY_NAMES:
+            if modality not in inputs[0]:
+                continue
+            present_modalities.append(modality)
+            cur = torch.stack([inp[modality] for inp in inputs], dim=0)
+            device = cur.device
+            # Check if it's single or multitemporal, and reshape accordingly
+            num_bands = Modality.get(modality).num_bands
+            num_timesteps = cur.shape[1] // num_bands
+            max_timesteps = max(max_timesteps, num_timesteps)
+            cur = rearrange(cur, "b (t c) h w -> b h w t c", t=num_timesteps)
+            kwargs[modality] = cur
+            # Create mask array which is BHWTS (without channels but with band sets).
+            num_band_sets = len(Modality.get(modality).band_sets)
+            mask_shape = cur.shape[0:4] + (num_band_sets,)
+            mask = (
+                torch.ones(mask_shape, dtype=torch.int32, device=device)
+                * MaskValue.ONLINE_ENCODER.value
+            )
+            kwargs[f"{modality}_mask"] = mask
+        # Timestamps is required.
+        # Note that only months (0 to 11) are used in OlmoEarth position encoding.
+        # For now, we assign same timestamps to all inputs, but later we should handle varying timestamps per input.
+        timestamps = torch.zeros(
+            (len(inputs), max_timesteps, 3), dtype=torch.int32, device=device
+        )
+        timestamps[:, :, 0] = 1  # day
+        timestamps[:, :, 1] = torch.arange(max_timesteps, device=device)[
+            None, :
+        ]  # month
+        timestamps[:, :, 2] = 2024  # year
+        kwargs["timestamps"] = timestamps
+        sample = MaskedOlmoEarthSample(**kwargs)
+        # Decide context based on self.autocast_dtype.
+        if self.autocast_dtype is None:
+            context = nullcontext()
+        else:
+            assert device is not None
+            context = torch.amp.autocast(
+                device_type=device.type, dtype=self.autocast_dtype
+            )
+        with context:
+            # Currently we assume the provided model always returns a TokensAndMasks object.
+            tokens_and_masks: TokensAndMasks
+            if isinstance(self.model, Encoder):
+                # Encoder has a fast_pass argument to indicate mask is not needed.
+                tokens_and_masks = self.model(
+                    sample, fast_pass=True, **self.forward_kwargs
+                )["tokens_and_masks"]
+            else:
+                # Other models like STEncoder do not have this option supported.
+                tokens_and_masks = self.model(sample, **self.forward_kwargs)[
+                    "tokens_and_masks"
+                ]
+        # Apply temporal/modality pooling so we just have one feature per patch.
+        features = []
+        for modality in present_modalities:
+            modality_features = getattr(tokens_and_masks, modality)
+            # Pool over band sets and timesteps (BHWTSC -> BHWC).
+            pooled = modality_features.mean(dim=[3, 4])
+            # We want BHWC -> BCHW.
+            pooled = rearrange(pooled, "b h w c -> b c h w")
+            features.append(pooled)
+        # Pool over the modalities, so we get one BCHW feature map.
+        pooled = torch.stack(features, dim=0).mean(dim=0)
+        return [pooled]
+    def get_backbone_channels(self) -> list:
+        """Returns the output channels of this model when used as a backbone.
+        The output channels is a list of (downsample_factor, depth) that corresponds
+        to the feature maps that the backbone returns. For example, an element [2, 32]
+        indicates that the corresponding feature map is 1/2 the input resolution and
+        has 32 channels.
+        Returns:
+            the output channels of the backbone as a list of (downsample_factor, depth)
+            tuples.
+        """
+        return [(self.patch_size, self.embedding_size)]

rslearn/models/olmoearth_pretrain/norm.py ADDED Viewed

@@ -0,0 +1,84 @@
+"""Normalization transforms."""
+import json
+from typing import Any
+from olmoearth_pretrain.data.normalize import load_computed_config
+from rslearn.log_utils import get_logger
+from rslearn.train.transforms.transform import Transform
+logger = get_logger(__file__)
+class OlmoEarthNormalize(Transform):
+    """Normalize using OlmoEarth JSON config.
+    For Sentinel-1 data, the values should be converted to decibels before being passed
+    to this transform.
+    """
+    def __init__(
+        self,
+        band_names: dict[str, list[str]],
+        std_multiplier: float | None = 2,
+        config_fname: str | None = None,
+    ) -> None:
+        """Initialize a new OlmoEarthNormalize.
+        Args:
+            band_names: map from modality name to the list of bands in that modality in
+                the order they are being loaded. Note that this order must match the
+                expected order for the OlmoEarth model.
+            std_multiplier: the std multiplier matching the one used for the model
+                training in OlmoEarth.
+            config_fname: load the normalization configuration from this file, instead
+                of getting it from OlmoEarth.
+        """
+        super().__init__()
+        self.band_names = band_names
+        self.std_multiplier = std_multiplier
+        if config_fname is None:
+            self.norm_config = load_computed_config()
+        else:
+            logger.warning(
+                f"Loading normalization config from {config_fname}. This argument is deprecated and will be removed in a future version."
+            )
+            with open(config_fname) as f:
+                self.norm_config = json.load(f)
+    def forward(
+        self, input_dict: dict[str, Any], target_dict: dict[str, Any]
+    ) -> tuple[dict[str, Any], dict[str, Any]]:
+        """Apply normalization over the inputs and targets.
+        Args:
+            input_dict: the input
+            target_dict: the target
+        Returns:
+            normalized (input_dicts, target_dicts) tuple
+        """
+        for modality_name, cur_band_names in self.band_names.items():
+            band_norms = self.norm_config[modality_name]
+            image = input_dict[modality_name]
+            # Keep a set of indices to make sure that we normalize all of them.
+            needed_band_indices = set(range(image.shape[0]))
+            num_timesteps = image.shape[0] // len(cur_band_names)
+            for band, norm_dict in band_norms.items():
+                # If multitemporal, normalize each timestep separately.
+                for t in range(num_timesteps):
+                    band_idx = cur_band_names.index(band) + t * len(cur_band_names)
+                    min_val = norm_dict["mean"] - self.std_multiplier * norm_dict["std"]
+                    max_val = norm_dict["mean"] + self.std_multiplier * norm_dict["std"]
+                    image[band_idx] = (image[band_idx] - min_val) / (max_val - min_val)
+                    needed_band_indices.remove(band_idx)
+            if len(needed_band_indices) > 0:
+                raise ValueError(
+                    f"for modality {modality_name}, bands {needed_band_indices} were unexpectedly not normalized"
+                )
+        return input_dict, target_dict

rslearn/models/pooling_decoder.py CHANGED Viewed

@@ -76,3 +76,46 @@ class PoolingDecoder(torch.nn.Module):
         features = torch.amax(features, dim=(2, 3))
         features = self.fc_layers(features)
         return self.output_layer(features)
+class SegmentationPoolingDecoder(PoolingDecoder):
+    """Like PoolingDecoder, but copy output to all pixels.
+    This allows for the model to produce a global output while still being compatible
+    with SegmentationTask. This only makes sense for very small windows, since the
+    output probabilities will be the same at all pixels. The main use case is to train
+    for a classification-like task on small windows, but still produce a raster during
+    inference on large windows.
+    """
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        image_key: str = "image",
+        **kwargs: Any,
+    ):
+        """Create a new SegmentationPoolingDecoder.
+        Args:
+            in_channels: input channels (channels in the last feature map passed to
+                this module)
+            out_channels: channels for the output flat feature vector
+            image_key: the key in inputs for the image from which the expected width
+                and height is derived.
+            kwargs: other arguments to pass to PoolingDecoder.
+        """
+        super().__init__(in_channels=in_channels, out_channels=out_channels, **kwargs)
+        self.image_key = image_key
+    def forward(
+        self, features: list[torch.Tensor], inputs: list[dict[str, Any]]
+    ) -> torch.Tensor:
+        """Extend PoolingDecoder forward to upsample the output to a segmentation mask.
+        This only works when all of the pixels have the same segmentation target.
+        """
+        output_probs = super().forward(features, inputs)
+        # BC -> BCHW
+        h, w = inputs[0][self.image_key].shape[1:3]
+        return output_probs[:, :, None, None].repeat([1, 1, h, w])

{rslearn-0.0.9.dist-info → rslearn-0.0.11.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: rslearn
-Version: 0.0.9
+Version: 0.0.11
 Summary: A library for developing remote sensing datasets and models
 Author: OlmoEarth Team
 License:                                  Apache License
@@ -243,6 +243,7 @@ Requires-Dist: planetary_computer>=1.0; extra == "extra"
 Requires-Dist: pycocotools>=2.0; extra == "extra"
 Requires-Dist: pystac_client>=0.9; extra == "extra"
 Requires-Dist: rtree>=1.4; extra == "extra"
+Requires-Dist: termcolor>=3.0; extra == "extra"
 Requires-Dist: satlaspretrain_models>=0.3; extra == "extra"
 Requires-Dist: scipy>=1.16; extra == "extra"
 Requires-Dist: terratorch>=1.0.2; extra == "extra"

{rslearn-0.0.9.dist-info → rslearn-0.0.11.dist-info}/RECORD RENAMED Viewed

@@ -57,7 +57,7 @@ rslearn/models/molmo.py,sha256=mVrARBhZciMzOgOOjGB5AHlPIf2iO9IBSJmdyKSl1L8,2061
 rslearn/models/multitask.py,sha256=j2Kiwj_dUiUp_CIUr25bS8HiyeoFlr1PGqjTfpgIGLc,14672
 rslearn/models/panopticon.py,sha256=woNEs53wVc5D-NxbSDEPRZ_mYe8vllnuldmADjvhfDQ,5806
 rslearn/models/pick_features.py,sha256=y8e4tJFhyG7ZuVSElWhQ5-Aer4ZKJCEH9wLGJU7WqGI,1551
-rslearn/models/pooling_decoder.py,sha256=jZfEQCfthfa21C9sEjgFHUcfhHMVlvG7_nDMw_1FLaE,2727
+rslearn/models/pooling_decoder.py,sha256=unr2fSE_QmJHPi3dKtopqMtb1Kn-2h94LgwwAVP9vZg,4437
 rslearn/models/prithvi.py,sha256=SVM3ypJlVTkXQ69pPhB4UeJr87VnmADTCuyV365dbkU,39961
 rslearn/models/registry.py,sha256=yCcrOvLkbn07Xtln1j7hAB_kmGw0MGsiR2TloJq9Bmk,504
 rslearn/models/resize_features.py,sha256=asKXWrLHIBrU6GaAV0Ory9YuK7IK104XjhkB4ljzI3A,1289
@@ -93,6 +93,9 @@ rslearn/models/detr/util.py,sha256=NMHhHbkIo7PoBUVbDqa2ZknJBTswmaxFCGHrPtFKnGg,6
 rslearn/models/galileo/__init__.py,sha256=QQa0C29nuPRva0KtGiMHQ2ZB02n9SSwj_wqTKPz18NM,112
 rslearn/models/galileo/galileo.py,sha256=jUHA64YvVC3Fz5fevc_9dFJfZaINODRDrhSGLIiOZcw,21115
 rslearn/models/galileo/single_file_galileo.py,sha256=l5tlmmdr2eieHNH-M7rVIvcptkv0Fuk3vKXFW691ezA,56143
+rslearn/models/olmoearth_pretrain/__init__.py,sha256=AjRvbjBdadCdPh-EdvySH76sVAQ8NGQaJt11Tsn1D5I,36
+rslearn/models/olmoearth_pretrain/model.py,sha256=F-B1ym9UZuTPJ0OY15Jwb1TkNtr_EtAUlqI-tr_Z2uo,8352
+rslearn/models/olmoearth_pretrain/norm.py,sha256=rHjFyWkpNLYMx9Ow7TsU-jGm9Sjx7FVf0p4R__ohx2c,3266
 rslearn/models/panopticon_data/sensors/drone.yaml,sha256=xqWS-_QMtJyRoWXJm-igoSur9hAmCFdqkPin8DT5qpw,431
 rslearn/models/panopticon_data/sensors/enmap.yaml,sha256=b2j6bSgYR2yKR9DRm3SPIzSVYlHf51ny_p-1B4B9sB4,13431
 rslearn/models/panopticon_data/sensors/goes.yaml,sha256=o00aoWCYqam0aB1rPmXq1MKe8hsKak_qyBG7BPL27Sc,152
@@ -156,9 +159,9 @@ rslearn/utils/spatial_index.py,sha256=eomJAUgzmjir8j9HZnSgQoJHwN9H0wGTjmJkMkLLfs
 rslearn/utils/sqlite_index.py,sha256=YGOJi66544e6JNtfSft6YIlHklFdSJO2duxQ4TJ2iu4,2920
 rslearn/utils/time.py,sha256=2ilSLG94_sxLP3y5RSV5L5CG8CoND_dbdzYEHVtN-I8,387
 rslearn/utils/vector_format.py,sha256=EIChYCL6GLOILS2TO2JBkca1TuaWsSubWv6iRS3P2ds,16139
-rslearn-0.0.9.dist-info/licenses/LICENSE,sha256=_99ZWPoLdlUbqZoSC5DF4ihiNwl5rTEmBaq2fACecdg,11352
-rslearn-0.0.9.dist-info/METADATA,sha256=6BV8wt9tuo94FkoKjR3RcF3AbKNbU3IodkJtK4tASkE,36248
-rslearn-0.0.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-rslearn-0.0.9.dist-info/entry_points.txt,sha256=doTBQ57NT7nq-dgYGgTTw6mafcGWb_4PWYtYR4rGm50,46
-rslearn-0.0.9.dist-info/top_level.txt,sha256=XDKo90WBH8P9RQumHxo0giLJsoufT4r9odv-WE6Ahk4,8
-rslearn-0.0.9.dist-info/RECORD,,
+rslearn-0.0.11.dist-info/licenses/LICENSE,sha256=_99ZWPoLdlUbqZoSC5DF4ihiNwl5rTEmBaq2fACecdg,11352
+rslearn-0.0.11.dist-info/METADATA,sha256=jwB0ZZ-oLa1Y_1iuZRKCQoB4i3kOFDJ0xSeMTJP7zww,36297
+rslearn-0.0.11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+rslearn-0.0.11.dist-info/entry_points.txt,sha256=doTBQ57NT7nq-dgYGgTTw6mafcGWb_4PWYtYR4rGm50,46
+rslearn-0.0.11.dist-info/top_level.txt,sha256=XDKo90WBH8P9RQumHxo0giLJsoufT4r9odv-WE6Ahk4,8
+rslearn-0.0.11.dist-info/RECORD,,

{rslearn-0.0.9.dist-info → rslearn-0.0.11.dist-info}/WHEEL RENAMED Viewed

File without changes

{rslearn-0.0.9.dist-info → rslearn-0.0.11.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{rslearn-0.0.9.dist-info → rslearn-0.0.11.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{rslearn-0.0.9.dist-info → rslearn-0.0.11.dist-info}/top_level.txt RENAMED Viewed

File without changes

rslearn 0.0.9__py3-none-any.whl → 0.0.11__py3-none-any.whl

rslearn 0.0.9py3-none-any.whl → 0.0.11py3-none-any.whl