PyPI - rslearn - Versions diffs - 0.0.3__py3-none-any.whl → 0.0.4__py3-none-any.whl - Mend

rslearn 0.0.3py3-none-any.whl → 0.0.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

rslearn/arg_parser.py +59 -0
rslearn/data_sources/copernicus.py +4 -4
rslearn/data_sources/earthdaily.py +21 -1
rslearn/data_sources/gcp_public_data.py +3 -3
rslearn/data_sources/utils.py +1 -17
rslearn/main.py +10 -1
rslearn/models/trunk.py +0 -144
rslearn/train/callbacks/adapters.py +53 -0
rslearn/train/callbacks/freeze_unfreeze.py +319 -0
rslearn/train/callbacks/gradients.py +54 -34
rslearn/train/data_module.py +70 -41
rslearn/train/dataset.py +232 -54
rslearn/train/lightning_module.py +4 -0
rslearn/train/prediction_writer.py +7 -0
rslearn/train/scheduler.py +15 -0
rslearn/train/tasks/per_pixel_regression.py +259 -0
rslearn/train/tasks/regression.py +6 -4
rslearn/train/tasks/segmentation.py +44 -14
rslearn/train/transforms/mask.py +69 -0
rslearn/utils/geometry.py +8 -8
{rslearn-0.0.3.dist-info → rslearn-0.0.4.dist-info}/METADATA +3 -3
{rslearn-0.0.3.dist-info → rslearn-0.0.4.dist-info}/RECORD +26 -24
rslearn/models/moe/distributed.py +0 -262
rslearn/models/moe/soft.py +0 -676
{rslearn-0.0.3.dist-info → rslearn-0.0.4.dist-info}/WHEEL +0 -0
{rslearn-0.0.3.dist-info → rslearn-0.0.4.dist-info}/entry_points.txt +0 -0
{rslearn-0.0.3.dist-info → rslearn-0.0.4.dist-info}/licenses/LICENSE +0 -0
{rslearn-0.0.3.dist-info → rslearn-0.0.4.dist-info}/top_level.txt +0 -0

rslearn/arg_parser.py ADDED Viewed

@@ -0,0 +1,59 @@
+"""Custom Lightning ArgumentParser with environment variable substitution support."""
+import os
+import re
+from typing import Any
+from jsonargparse import Namespace
+from lightning.pytorch.cli import LightningArgumentParser
+def substitute_env_vars_in_string(content: str) -> str:
+    """Substitute environment variables in a string.
+    Replaces ${VAR_NAME} patterns with os.getenv(VAR_NAME, "") values.
+    This works on raw string content before YAML parsing.
+    Args:
+        content: The string content containing template variables
+    Returns:
+        The string with environment variables substituted
+    """
+    pattern = r"\$\{([^}]+)\}"
+    def replace_variable(match_obj: re.Match[str]) -> str:
+        var_name = match_obj.group(1)
+        env_value = os.getenv(var_name, "")
+        return env_value if env_value is not None else ""
+    return re.sub(pattern, replace_variable, content)
+class RslearnArgumentParser(LightningArgumentParser):
+    """Custom ArgumentParser that substitutes environment variables in config files.
+    This parser extends LightningArgumentParser to automatically substitute
+    ${VAR_NAME} patterns with environment variable values before parsing
+    configuration content. This allows config files to use environment
+    variables while still passing Lightning's validation.
+    """
+    def parse_string(
+        self,
+        cfg_str: str,
+        cfg_path: str | os.PathLike = "",
+        ext_vars: dict | None = None,
+        env: bool | None = None,
+        defaults: bool = True,
+        with_meta: bool | None = None,
+        **kwargs: Any,
+    ) -> Namespace:
+        """Pre-processes string for environment variable substitution before parsing."""
+        # Substitute environment variables in the config string before parsing
+        substituted_cfg_str = substitute_env_vars_in_string(cfg_str)
+        # Call the parent method with the substituted config
+        return super().parse_string(
+            substituted_cfg_str, cfg_path, ext_vars, env, defaults, with_meta, **kwargs
+        )

rslearn/data_sources/copernicus.py CHANGED Viewed

@@ -34,7 +34,7 @@ from rslearn.utils.geometry import (
     FloatBounds,
     STGeometry,
     flatten_shape,
-    split_shape_at_prime_meridian,
+    split_shape_at_antimeridian,
 )
 from rslearn.utils.grid_index import GridIndex
 from rslearn.utils.raster_format import get_raster_projection_and_bounds
@@ -160,7 +160,7 @@ def get_sentinel2_tile_index() -> dict[str, list[FloatBounds]]:
         # issues where the tile bounds go from -180 to 180 longitude and thus match
         # with anything at the same latitude.
         union_shp = shapely.unary_union(shapes)
-        split_shapes = flatten_shape(split_shape_at_prime_meridian(union_shp))
+        split_shapes = flatten_shape(split_shape_at_antimeridian(union_shp))
         bounds_list: list[FloatBounds] = []
         for shp in split_shapes:
             bounds_list.append(shp.bounds)
@@ -222,10 +222,10 @@ def get_sentinel2_tiles(geometry: STGeometry, cache_dir: UPath) -> list[str]:
     """
     tile_index = load_sentinel2_tile_index(cache_dir)
     wgs84_geometry = geometry.to_projection(WGS84_PROJECTION)
-    # If the shape is a collection, it could be cutting across prime meridian.
+    # If the shape is a collection, it could be cutting across antimeridian.
     # So we query each component shape separately and collect the results to avoid
     # issues.
-    # We assume the caller has already applied split_at_prime_meridian.
+    # We assume the caller has already applied split_at_antimeridian.
     results = set()
     for shp in flatten_shape(wgs84_geometry.shp):
         for result in tile_index.query(shp.bounds):

rslearn/data_sources/earthdaily.py CHANGED Viewed

@@ -82,6 +82,8 @@ class EarthDaily(DataSource, TileStore):
         timeout: timedelta = timedelta(seconds=10),
         skip_items_missing_assets: bool = False,
         cache_dir: UPath | None = None,
+        max_retries: int = 3,
+        retry_backoff_factor: float = 5.0,
         service_name: Literal["platform"] = "platform",
     ):
         """Initialize a new EarthDaily instance.
@@ -99,6 +101,11 @@ class EarthDaily(DataSource, TileStore):
             cache_dir: optional directory to cache items by name, including asset URLs.
                 If not set, there will be no cache and instead STAC requests will be
                 needed each time.
+            max_retries: the maximum number of retry attempts for HTTP requests that fail
+                due to transient errors (e.g., 429, 500, 502, 503, 504 status codes).
+            retry_backoff_factor: backoff factor for exponential retry delays between HTTP
+                request attempts.  The delay between retries is calculated using the formula:
+                `(retry_backoff_factor * (2 ** (retry_count - 1)))` seconds.
             service_name: the service name, only "platform" is supported, the other
                 services "legacy" and "internal" are not supported.
         """
@@ -110,6 +117,8 @@ class EarthDaily(DataSource, TileStore):
         self.timeout = timeout
         self.skip_items_missing_assets = skip_items_missing_assets
         self.cache_dir = cache_dir
+        self.max_retries = max_retries
+        self.retry_backoff_factor = retry_backoff_factor
         self.service_name = service_name
         if cache_dir is not None:
@@ -139,6 +148,12 @@ class EarthDaily(DataSource, TileStore):
         if "cache_dir" in d:
             kwargs["cache_dir"] = join_upath(ds_path, d["cache_dir"])
+        if "max_retries" in d:
+            kwargs["max_retries"] = d["max_retries"]
+        if "retry_backoff_factor" in d:
+            kwargs["retry_backoff_factor"] = d["retry_backoff_factor"]
         simple_optionals = ["query", "sort_by", "sort_ascending"]
         for k in simple_optionals:
             if k in d:
@@ -159,7 +174,12 @@ class EarthDaily(DataSource, TileStore):
         if self.eds_client is not None:
             return self.eds_client, self.client, self.collection
-        self.eds_client = EDSClient(EDSConfig())
+        self.eds_client = EDSClient(
+            EDSConfig(
+                max_retries=self.max_retries,
+                retry_backoff_factor=self.retry_backoff_factor,
+            )
+        )
         if self.service_name == "platform":
             self.client = self.eds_client.platform.pystac_client

rslearn/data_sources/gcp_public_data.py CHANGED Viewed

@@ -26,7 +26,7 @@ from rslearn.data_sources.utils import match_candidate_items_to_window
 from rslearn.log_utils import get_logger
 from rslearn.tile_stores import TileStoreWithLayer
 from rslearn.utils.fsspec import join_upath, open_atomic
-from rslearn.utils.geometry import STGeometry, flatten_shape, split_at_prime_meridian
+from rslearn.utils.geometry import STGeometry, flatten_shape, split_at_antimeridian
 from rslearn.utils.raster_format import get_raster_projection_and_bounds
 from .copernicus import get_harmonize_callback, get_sentinel2_tiles
@@ -358,7 +358,7 @@ class Sentinel2(DataSource):
             shp = shapely.box(*bounds)
             sensing_time = row["sensing_time"]
             geometry = STGeometry(WGS84_PROJECTION, shp, (sensing_time, sensing_time))
-            geometry = split_at_prime_meridian(geometry)
+            geometry = split_at_antimeridian(geometry)
             cloud_cover = float(row["cloud_cover"])
@@ -511,7 +511,7 @@ class Sentinel2(DataSource):
         time_range = (product_xml.start_time, product_xml.start_time)
         geometry = STGeometry(WGS84_PROJECTION, product_xml.shp, time_range)
-        geometry = split_at_prime_meridian(geometry)
+        geometry = split_at_antimeridian(geometry)
         # Sometimes the geometry is not valid.
         # We just apply make_valid on it to correct issues.

rslearn/data_sources/utils.py CHANGED Viewed

@@ -256,23 +256,7 @@ def match_candidate_items_to_window(
             if item_geom.is_global():
                 item_geom = geometry
             else:
-                # Windows are usually smaller than items.
-                # So we first clip the item to the window bounds in the item's
-                # projection, then re-project the item to the window's projection.
-                buffered_window_geom = STGeometry(
-                    geometry.projection,
-                    geometry.shp.buffer(1),
-                    geometry.time_range,
-                )
-                window_shp_in_item_proj = buffered_window_geom.to_projection(
-                    item_geom.projection
-                ).shp
-                clipped_item_geom = STGeometry(
-                    item_geom.projection,
-                    item_geom.shp.intersection(window_shp_in_item_proj),
-                    item_geom.time_range,
-                )
-                item_geom = clipped_item_geom.to_projection(geometry.projection)
+                item_geom = item_geom.to_projection(geometry.projection)
         item_shps.append(item_geom.shp)
     if query_config.space_mode == SpaceMode.CONTAINS:

rslearn/main.py CHANGED Viewed

@@ -13,6 +13,7 @@ from lightning.pytorch.cli import LightningArgumentParser, LightningCLI
 from rasterio.crs import CRS
 from upath import UPath
+from rslearn.arg_parser import RslearnArgumentParser
 from rslearn.config import LayerConfig
 from rslearn.const import WGS84_EPSG
 from rslearn.data_sources import Item, data_source_from_config
@@ -779,7 +780,7 @@ def dataset_build_index() -> None:
 class RslearnLightningCLI(LightningCLI):
-    """LightningCLI that links data.tasks to model.tasks."""
+    """LightningCLI that links data.tasks to model.tasks and supports environment variables."""
     def add_arguments_to_parser(self, parser: LightningArgumentParser) -> None:
         """Link data.tasks to model.tasks.
@@ -787,6 +788,7 @@ class RslearnLightningCLI(LightningCLI):
         Args:
             parser: the argument parser
         """
+        # Link data.tasks to model.tasks
         parser.link_arguments(
             "data.init_args.task", "model.init_args.task", apply_on="instantiate"
         )
@@ -815,6 +817,12 @@ class RslearnLightningCLI(LightningCLI):
         # sampler as needed.
         c.trainer.use_distributed_sampler = False
+        # For predict, make sure that return_predictions is False.
+        # Otherwise all the predictions would be stored in memory which can lead to
+        # high memory consumption.
+        if subcommand == "predict":
+            c.return_predictions = False
 def model_handler() -> None:
     """Handler for any rslearn model X commands."""
@@ -825,6 +833,7 @@ def model_handler() -> None:
         subclass_mode_model=True,
         subclass_mode_data=True,
         save_config_kwargs={"overwrite": True},
+        parser_class=RslearnArgumentParser,
     )

rslearn/models/trunk.py CHANGED Viewed

@@ -6,7 +6,6 @@ from typing import Any
 import torch
 from rslearn.log_utils import get_logger
-from rslearn.models.moe.soft import SoftMoE
 from rslearn.models.task_embedding import BaseTaskEmbedding
 logger = get_logger(__name__)
@@ -135,146 +134,3 @@ class DecoderTrunk(torch.nn.Module):
         """
         for layer in self.layers:
             layer.apply_auxiliary_losses(trunk_out, outs)
-class MoETransformer(DecoderTrunkLayer):
-    """Transformer for decoder trunk."""
-    def __init__(
-        self,
-        dim: int,
-        n_layers: int,
-        n_heads: int,
-        mlp_dim: int = 512,
-        dropout: float = 0.1,
-        task_moe: bool = False,
-        disable_moe: bool = False,
-        num_experts: int = 16,
-        num_slots: int = 256,
-        expert_mult: int = 4,
-        load_balance_loss_weight: float = 0.0,
-    ):
-        """Standard ViT-style transformer, with soft MoE.
-        Since the point of the MoE layers is to deal with task-specific and task-shared
-        features (and not to route specific tokens), it's probably best to use max_seq_len
-        as the number of slots, and have at least one expert per task (probably more).
-        Args:
-            dim: dimension of the input and output
-            n_layers: number of transformer blocks
-            n_heads: number of attention heads
-            mlp_dim: dimension of the MLP
-            dropout: dropout rate
-            task_moe: if specified, compute dispatch weights given the task embedding
-                only, and not the token
-            disable_moe: if True, disable MoE
-            num_experts: number of experts in soft MoE
-            num_slots: number of slots in soft MoE
-            expert_mult: factor by which to multiply mlp_dim in the hidden layer of experts
-            load_balance_loss_weight: weight of the load balance loss
-        """
-        super().__init__()
-        self.disable_moe = disable_moe
-        self.num_experts = num_experts
-        self.num_slots = num_slots
-        self.task_moe = task_moe
-        self.load_balance_loss_weight = load_balance_loss_weight
-        self.norm = torch.nn.LayerNorm(dim)
-        self.layers = torch.nn.ModuleList([])
-        for _ in range(n_layers):
-            mha = torch.nn.MultiheadAttention(
-                dim, n_heads, dropout=dropout, batch_first=True
-            )
-            if not disable_moe:
-                ffn = SoftMoE(
-                    dim=dim,
-                    num_experts=num_experts,
-                    num_slots=num_slots,
-                    dropout=dropout,
-                    expert_mult=expert_mult,
-                )
-            else:
-                ffn = torch.nn.Sequential(
-                    torch.nn.LayerNorm(dim),
-                    torch.nn.Linear(dim, mlp_dim),
-                    torch.nn.GELU(),
-                    torch.nn.Linear(mlp_dim, dim),
-                )
-            drop = torch.nn.Dropout(dropout)
-            self.layers.append(torch.nn.ModuleList([mha, ffn, drop]))
-    def forward(
-        self, x: torch.Tensor, task_embedding: torch.Tensor | None = None
-    ) -> dict[str, torch.Tensor]:
-        """Forward pass.
-        Args:
-            x: input tensor of shape (batch_size, seq_len, dim)
-            task_embedding: task embedding tensor of shape (batch_size, dim)
-        Returns:
-            dict with key "outputs" (output tensor of shape (batch_size, seq_len, dim))
-            and optionally "load_balance_loss", "dispatch_weights", and "combine_weights".
-        """
-        # Forward pass through the transformer
-        infos: list[dict[str, Any]] = []
-        for mha, ffn, drop in self.layers:
-            x = mha(x, x, x)[0] + x
-            if not self.disable_moe:
-                outs = ffn(x, weight_key=task_embedding if self.task_moe else None)
-                x_ffn = outs.pop("outputs")
-                infos.append(outs)
-                x = drop(x_ffn + x)
-            else:
-                x = drop(ffn(x) + x)
-        x = self.norm(x)
-        outputs = {"outputs": x}
-        # If using MoE, collect expert weights and auxiliary losses
-        # Don't call detach because we will use this later on in the loss collation
-        if not self.disable_moe:
-            collated: dict[str, list[torch.Tensor]] = {
-                "load_balance_loss": [],
-                "dispatch_weights": [],
-                "combine_weights": [],
-            }
-            for info in infos:
-                for k, v in info.items():
-                    if k == "dispatch_weights":
-                        # each weight is [batch, seq_len, num_experts, num_slots]
-                        # compute avg weight per token across slot/batch/expert
-                        # NOTE: this is probably about the same across all tokens,
-                        # assuming all tokens get looked at by a few experts
-                        collated["dispatch_weights"].append(v.mean((0, 2, 3)))
-                    elif k == "combine_weights":
-                        # each weight is [batch, seq_len, num_experts * num_slots]
-                        # compute avg weight per expert (slot group) across batch/seq
-                        v = v.unflatten(-1, (self.num_experts, self.num_slots))
-                        v = v.sum(-1)  # [batch, seq_len, num_experts (softmax)]
-                        collated["combine_weights"].append(v.mean((0, 1)))
-                    elif k == "load_balance_loss":
-                        # each load balance loss per layer is a scalar
-                        collated["load_balance_loss"].append(v)
-            outputs.update(collated)
-        return outputs
-    def apply_auxiliary_losses(
-        self, trunk_out: dict[str, Any], outs: dict[str, Any]
-    ) -> None:
-        """Apply auxiliary losses in-place.
-        Just move the load balance loss to the loss dict, where it will eventually be summed.
-        Args:
-            trunk_out: The output of the trunk.
-            outs: The output of the decoders, with key "loss_dict" containing the losses.
-        """
-        if "load_balance_loss" in trunk_out and self.load_balance_loss_weight > 0.0:
-            total_aux_loss = torch.stack(trunk_out["load_balance_loss"]).mean()
-            outs["loss_dict"]["load_balance_loss"] = (
-                self.load_balance_loss_weight * total_aux_loss
-            )

rslearn/train/callbacks/adapters.py ADDED Viewed

@@ -0,0 +1,53 @@
+"""Callback to activate/deactivate adapter layers."""
+from typing import Any
+from lightning.pytorch import LightningModule
+from lightning.pytorch.callbacks import Callback
+from lightning.pytorch.trainer import Trainer
+from rslearn.log_utils import get_logger
+logger = get_logger(__name__)
+class ActivateLayers(Callback):
+    """Activates adapter layers on a given epoch.
+    By default, at every epoch, every adapter layer is deactivated.
+    To activate an adapter layer, add a selector with the name of the adapter layer
+    and the epoch at which to activate it. Once an adapter layer is activated, it
+    remains active until the end of training.
+    """
+    def __init__(self, selectors: list[dict[str, Any]]) -> None:
+        """Initialize the callback.
+        Args:
+            selectors: List of selectors to activate.
+                Each selector is a dictionary with the following keys:
+                - "name": Substring selector of modules to activate (str).
+                - "at_epoch": The epoch at which to activate (int).
+        """
+        self.selectors = selectors
+    def on_train_epoch_start(
+        self,
+        trainer: Trainer,
+        pl_module: LightningModule,
+    ) -> None:
+        """Activate adapter layers on a given epoch.
+        Adapter layers are activated/deactivated by setting the `active` attribute.
+        Args:
+            trainer: The trainer object.
+            pl_module: The LightningModule object.
+        """
+        status = {}
+        for name, module in pl_module.named_modules():
+            for selector in self.selectors:
+                if selector["name"] in name:
+                    module.active = trainer.current_epoch >= selector["at_epoch"]
+                    status[selector["name"]] = "active" if module.active else "inactive"
+        logger.info(f"Updated adapter status: {status}")

rslearn 0.0.3__py3-none-any.whl → 0.0.4__py3-none-any.whl

rslearn 0.0.3py3-none-any.whl → 0.0.4py3-none-any.whl