PyPI - rslearn - Versions diffs - 0.0.17__py3-none-any.whl → 0.0.19__py3-none-any.whl - Mend

rslearn 0.0.17py3-none-any.whl → 0.0.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

rslearn/arg_parser.py +2 -9
rslearn/config/__init__.py +2 -0
rslearn/config/dataset.py +64 -20
rslearn/dataset/add_windows.py +1 -1
rslearn/dataset/dataset.py +34 -84
rslearn/dataset/materialize.py +5 -5
rslearn/dataset/storage/__init__.py +1 -0
rslearn/dataset/storage/file.py +202 -0
rslearn/dataset/storage/storage.py +140 -0
rslearn/dataset/window.py +26 -80
rslearn/lightning_cli.py +22 -11
rslearn/main.py +12 -37
rslearn/models/anysat.py +11 -9
rslearn/models/attention_pooling.py +177 -0
rslearn/models/clay/clay.py +8 -9
rslearn/models/clip.py +18 -15
rslearn/models/component.py +111 -0
rslearn/models/concatenate_features.py +21 -11
rslearn/models/conv.py +15 -8
rslearn/models/croma.py +13 -8
rslearn/models/detr/detr.py +25 -14
rslearn/models/dinov3.py +11 -6
rslearn/models/faster_rcnn.py +19 -9
rslearn/models/feature_center_crop.py +12 -9
rslearn/models/fpn.py +19 -8
rslearn/models/galileo/galileo.py +23 -18
rslearn/models/module_wrapper.py +26 -57
rslearn/models/molmo.py +16 -14
rslearn/models/multitask.py +102 -73
rslearn/models/olmoearth_pretrain/model.py +135 -38
rslearn/models/panopticon.py +8 -7
rslearn/models/pick_features.py +18 -24
rslearn/models/pooling_decoder.py +22 -14
rslearn/models/presto/presto.py +16 -10
rslearn/models/presto/single_file_presto.py +4 -10
rslearn/models/prithvi.py +12 -8
rslearn/models/resize_features.py +21 -7
rslearn/models/sam2_enc.py +11 -9
rslearn/models/satlaspretrain.py +15 -9
rslearn/models/simple_time_series.py +37 -17
rslearn/models/singletask.py +24 -17
rslearn/models/ssl4eo_s12.py +15 -10
rslearn/models/swin.py +22 -13
rslearn/models/terramind.py +24 -7
rslearn/models/trunk.py +6 -3
rslearn/models/unet.py +18 -9
rslearn/models/upsample.py +22 -9
rslearn/train/all_patches_dataset.py +89 -37
rslearn/train/dataset.py +105 -97
rslearn/train/lightning_module.py +51 -32
rslearn/train/model_context.py +54 -0
rslearn/train/prediction_writer.py +111 -41
rslearn/train/scheduler.py +15 -0
rslearn/train/tasks/classification.py +34 -15
rslearn/train/tasks/detection.py +24 -31
rslearn/train/tasks/embedding.py +33 -29
rslearn/train/tasks/multi_task.py +7 -7
rslearn/train/tasks/per_pixel_regression.py +41 -19
rslearn/train/tasks/regression.py +38 -21
rslearn/train/tasks/segmentation.py +33 -15
rslearn/train/tasks/task.py +3 -2
rslearn/train/transforms/resize.py +74 -0
rslearn/utils/geometry.py +73 -0
rslearn/utils/jsonargparse.py +66 -0
{rslearn-0.0.17.dist-info → rslearn-0.0.19.dist-info}/METADATA +1 -1
{rslearn-0.0.17.dist-info → rslearn-0.0.19.dist-info}/RECORD +71 -66
rslearn/dataset/index.py +0 -173
rslearn/models/registry.py +0 -22
{rslearn-0.0.17.dist-info → rslearn-0.0.19.dist-info}/WHEEL +0 -0
{rslearn-0.0.17.dist-info → rslearn-0.0.19.dist-info}/entry_points.txt +0 -0
{rslearn-0.0.17.dist-info → rslearn-0.0.19.dist-info}/licenses/LICENSE +0 -0
{rslearn-0.0.17.dist-info → rslearn-0.0.19.dist-info}/licenses/NOTICE +0 -0
{rslearn-0.0.17.dist-info → rslearn-0.0.19.dist-info}/top_level.txt +0 -0

rslearn/dataset/storage/storage.py ADDED Viewed

@@ -0,0 +1,140 @@
+"""Abstract classes for window metadata storage."""
+import abc
+from typing import TYPE_CHECKING
+from upath import UPath
+if TYPE_CHECKING:
+    from rslearn.dataset.window import Window, WindowLayerData
+class WindowStorage(abc.ABC):
+    """An abstract class for the storage backend for window metadata.
+    This is instantiated by a WindowStorageFactory for a specific rslearn dataset.
+    Window metadata includes the location and time range of windows (metadata.json),
+    the window layer datas (items.json), and the completed (materialized) layers. It
+    excludes the actual materialized data. All operations involving window metadata go
+    through the WindowStorage, including enumerating windows, creating new windows, and
+    updating window layer datas during `rslearn dataset prepare` or the completed
+    layers during `rslearn dataset materialize`.
+    """
+    @abc.abstractmethod
+    def get_window_root(self, group: str, name: str) -> UPath:
+        """Get the path where the window should be stored."""
+        raise NotImplementedError
+    @abc.abstractmethod
+    def get_windows(
+        self,
+        groups: list[str] | None = None,
+        names: list[str] | None = None,
+    ) -> list["Window"]:
+        """Load the windows in the dataset.
+        Args:
+            groups: an optional list of groups to filter loading
+            names: an optional list of window names to filter loading
+        """
+        raise NotImplementedError
+    @abc.abstractmethod
+    def create_or_update_window(self, window: "Window") -> None:
+        """Create or update the window.
+        An existing window is only updated if there is one with the same name and group.
+        If there is a window with the same name but a different group, the behavior is
+        undefined.
+        """
+        raise NotImplementedError
+    @abc.abstractmethod
+    def get_layer_datas(self, group: str, name: str) -> dict[str, "WindowLayerData"]:
+        """Get the window layer datas for the specified window.
+        Args:
+            group: the window group.
+            name: the window name.
+        Returns:
+            a dict mapping from the layer name to the layer data for that layer, if one
+                was previously saved.
+        """
+        raise NotImplementedError
+    @abc.abstractmethod
+    def save_layer_datas(
+        self, group: str, name: str, layer_datas: dict[str, "WindowLayerData"]
+    ) -> None:
+        """Set the window layer datas for the specified window."""
+        raise NotImplementedError
+    @abc.abstractmethod
+    def list_completed_layers(self, group: str, name: str) -> list[tuple[str, int]]:
+        """List the layers available for this window that are completed.
+        Args:
+            group: the window group.
+            name: the window name.
+        Returns:
+            a list of (layer_name, group_idx) completed layers.
+        """
+        raise NotImplementedError
+    @abc.abstractmethod
+    def is_layer_completed(
+        self, group: str, name: str, layer_name: str, group_idx: int = 0
+    ) -> bool:
+        """Check whether the specified layer is completed in the given window.
+        Completed means there is data in the layer and the data has been written
+        (materialized).
+        Args:
+            group: the window group.
+            name: the window name.
+            layer_name: the layer name.
+            group_idx: the index of the group within the layer.
+        Returns:
+            whether the layer is completed.
+        """
+        raise NotImplementedError
+    @abc.abstractmethod
+    def mark_layer_completed(
+        self, group: str, name: str, layer_name: str, group_idx: int = 0
+    ) -> None:
+        """Mark the specified layer completed for the given window.
+        This must be done after the contents of the layer have been written. If a layer
+        has multiple groups, the caller should wait until the contents of all groups
+        have been written before marking them completed; this is because, when
+        materializing a window, we skip materialization if the first group
+        (group_idx=0) is marked completed.
+        Args:
+            group: the window group.
+            name: the window name.
+            layer_name: the layer name.
+            group_idx: the index of the group within the layer.
+        """
+        raise NotImplementedError
+class WindowStorageFactory(abc.ABC):
+    """An abstract class for a configurable storage backend for window metadata.
+    The dataset config includes a StorageConfig that configures a WindowStorageFactory,
+    which in turn creates a WindowStorage given a dataset path.
+    """
+    @abc.abstractmethod
+    def get_storage(self, ds_path: UPath) -> WindowStorage:
+        """Get a WindowStorage for the given dataset path."""
+        raise NotImplementedError

rslearn/dataset/window.py CHANGED Viewed

@@ -1,20 +1,16 @@
 """rslearn windows."""
-import json
 from datetime import datetime
-from typing import TYPE_CHECKING, Any
+from typing import Any
 import shapely
 from upath import UPath
+from rslearn.dataset.storage.storage import WindowStorage
 from rslearn.log_utils import get_logger
 from rslearn.utils import Projection, STGeometry
-from rslearn.utils.fsspec import open_atomic
 from rslearn.utils.raster_format import get_bandset_dirname
-if TYPE_CHECKING:
-    from .index import DatasetIndex
 logger = get_logger(__name__)
 LAYERS_DIRECTORY_NAME = "layers"
@@ -138,14 +134,13 @@ class Window:
     def __init__(
         self,
-        path: UPath,
+        storage: WindowStorage,
         group: str,
         name: str,
         projection: Projection,
         bounds: tuple[int, int, int, int],
         time_range: tuple[datetime, datetime] | None,
         options: dict[str, Any] = {},
-        index: "DatasetIndex | None" = None,
     ) -> None:
         """Creates a new Window instance.
@@ -153,23 +148,21 @@ class Window:
         stored in metadata.json.
         Args:
-            path: the directory of this window
+            storage: the dataset storage for the underlying rslearn dataset.
             group: the group the window belongs to
             name: the unique name for this window
             projection: the projection of the window
             bounds: the bounds of the window in pixel coordinates
             time_range: optional time range of the window
             options: additional options (?)
-            index: DatasetIndex if it is available
         """
-        self.path = path
+        self.storage = storage
         self.group = group
         self.name = name
         self.projection = projection
         self.bounds = bounds
         self.time_range = time_range
         self.options = options
-        self.index = index
     def get_geometry(self) -> STGeometry:
         """Computes the STGeometry corresponding to this window."""
@@ -181,29 +174,11 @@ class Window:
     def load_layer_datas(self) -> dict[str, WindowLayerData]:
         """Load layer datas describing items in retrieved layers from items.json."""
-        # Load from index if it is available.
-        if self.index is not None:
-            layer_datas = self.index.layer_datas.get(self.name, [])
-        else:
-            items_fname = self.path / "items.json"
-            if not items_fname.exists():
-                return {}
-            with items_fname.open("r") as f:
-                layer_datas = [
-                    WindowLayerData.deserialize(layer_data)
-                    for layer_data in json.load(f)
-                ]
-        return {layer_data.layer_name: layer_data for layer_data in layer_datas}
+        return self.storage.get_layer_datas(self.group, self.name)
     def save_layer_datas(self, layer_datas: dict[str, WindowLayerData]) -> None:
         """Save layer datas to items.json."""
-        json_data = [layer_data.serialize() for layer_data in layer_datas.values()]
-        items_fname = self.path / "items.json"
-        logger.info(f"Saving window items to {items_fname}")
-        with open_atomic(items_fname, "w") as f:
-            json.dump(json_data, f)
+        self.storage.save_layer_datas(self.group, self.name, layer_datas)
     def list_completed_layers(self) -> list[tuple[str, int]]:
         """List the layers available for this window that are completed.
@@ -211,18 +186,7 @@ class Window:
         Returns:
             a list of (layer_name, group_idx) completed layers.
         """
-        layers_directory = self.path / LAYERS_DIRECTORY_NAME
-        if not layers_directory.exists():
-            return []
-        completed_layers = []
-        for layer_dir in layers_directory.iterdir():
-            layer_name, group_idx = get_layer_and_group_from_dir_name(layer_dir.name)
-            if not self.is_layer_completed(layer_name, group_idx):
-                continue
-            completed_layers.append((layer_name, group_idx))
-        return completed_layers
+        return self.storage.list_completed_layers(self.group, self.name)
     def get_layer_dir(self, layer_name: str, group_idx: int = 0) -> UPath:
         """Get the directory containing materialized data for the specified layer.
@@ -235,7 +199,9 @@ class Window:
         Returns:
             the path where data is or should be materialized.
         """
-        return get_window_layer_dir(self.path, layer_name, group_idx)
+        return get_window_layer_dir(
+            self.storage.get_window_root(self.group, self.name), layer_name, group_idx
+        )
     def is_layer_completed(self, layer_name: str, group_idx: int = 0) -> bool:
         """Check whether the specified layer is completed.
@@ -250,14 +216,9 @@ class Window:
         Returns:
             whether the layer is completed
         """
-        # Use the index to speed up the completed check if it is available.
-        if self.index is not None:
-            return (layer_name, group_idx) in self.index.completed_layers.get(
-                self.name, []
-            )
-        layer_dir = self.get_layer_dir(layer_name, group_idx)
-        return (layer_dir / "completed").exists()
+        return self.storage.is_layer_completed(
+            self.group, self.name, layer_name, group_idx
+        )
     def mark_layer_completed(self, layer_name: str, group_idx: int = 0) -> None:
         """Mark the specified layer completed.
@@ -272,8 +233,7 @@ class Window:
             layer_name: the layer name.
             group_idx: the index of the group within the layer.
         """
-        layer_dir = self.get_layer_dir(layer_name, group_idx)
-        (layer_dir / "completed").touch()
+        self.storage.mark_layer_completed(self.group, self.name, layer_name, group_idx)
     def get_raster_dir(
         self, layer_name: str, bands: list[str], group_idx: int = 0
@@ -289,7 +249,12 @@ class Window:
         Returns:
             the directory containing the raster.
         """
-        return get_window_raster_dir(self.path, layer_name, bands, group_idx)
+        return get_window_raster_dir(
+            self.storage.get_window_root(self.group, self.name),
+            layer_name,
+            bands,
+            group_idx,
+        )
     def get_metadata(self) -> dict[str, Any]:
         """Returns the window metadata dictionary."""
@@ -308,18 +273,14 @@ class Window:
     def save(self) -> None:
         """Save the window metadata to its root directory."""
-        self.path.mkdir(parents=True, exist_ok=True)
-        metadata_path = self.path / "metadata.json"
-        logger.debug(f"Saving window metadata to {metadata_path}")
-        with open_atomic(metadata_path, "w") as f:
-            json.dump(self.get_metadata(), f)
+        self.storage.create_or_update_window(self)
     @staticmethod
-    def from_metadata(path: UPath, metadata: dict[str, Any]) -> "Window":
-        """Create a Window from its path and metadata dictionary.
+    def from_metadata(storage: WindowStorage, metadata: dict[str, Any]) -> "Window":
+        """Create a Window from the WindowStorage and the window's metadata dictionary.
         Args:
-            path: the root directory of the window.
+            storage: the WindowStorage for the underlying dataset.
             metadata: the window metadata.
         Returns:
@@ -334,7 +295,7 @@ class Window:
         )
         return Window(
-            path=path,
+            storage=storage,
             group=metadata["group"],
             name=metadata["name"],
             projection=Projection.deserialize(metadata["projection"]),
@@ -350,21 +311,6 @@ class Window:
             options=metadata["options"],
         )
-    @staticmethod
-    def load(path: UPath) -> "Window":
-        """Load a Window from a UPath.
-        Args:
-            path: the root directory of the window
-        Returns:
-            the Window
-        """
-        metadata_fname = path / "metadata.json"
-        with metadata_fname.open("r") as f:
-            metadata = json.load(f)
-        return Window.from_metadata(path, metadata)
     @staticmethod
     def get_window_root(ds_path: UPath, group: str, name: str) -> UPath:
         """Gets the root directory of a window.

rslearn/lightning_cli.py CHANGED Viewed

@@ -21,6 +21,7 @@ from rslearn.log_utils import get_logger
 from rslearn.train.data_module import RslearnDataModule
 from rslearn.train.lightning_module import RslearnLightningModule
 from rslearn.utils.fsspec import open_atomic
+from rslearn.utils.jsonargparse import init_jsonargparse
 WANDB_ID_FNAME = "wandb_id"
@@ -390,8 +391,15 @@ class RslearnLightningCLI(LightningCLI):
         Sets the dataset path for any configured RslearnPredictionWriter callbacks.
         """
-        subcommand = self.config.subcommand
-        c = self.config[subcommand]
+        if not hasattr(self.config, "subcommand"):
+            logger.warning(
+                "Config does not have subcommand attribute, assuming we are in run=False mode"
+            )
+            subcommand = None
+            c = self.config
+        else:
+            subcommand = self.config.subcommand
+            c = self.config[subcommand]
         # If there is a RslearnPredictionWriter, set its path.
         prediction_writer_callback = None
@@ -415,16 +423,17 @@ class RslearnLightningCLI(LightningCLI):
         if subcommand == "predict":
             c.return_predictions = False
-        # For now we use DDP strategy with find_unused_parameters=True.
+        # Default to DDP with find_unused_parameters. Likely won't get called with unified config
         if subcommand == "fit":
-            c.trainer.strategy = jsonargparse.Namespace(
-                {
-                    "class_path": "lightning.pytorch.strategies.DDPStrategy",
-                    "init_args": jsonargparse.Namespace(
-                        {"find_unused_parameters": True}
-                    ),
-                }
-            )
+            if not c.trainer.strategy:
+                c.trainer.strategy = jsonargparse.Namespace(
+                    {
+                        "class_path": "lightning.pytorch.strategies.DDPStrategy",
+                        "init_args": jsonargparse.Namespace(
+                            {"find_unused_parameters": True}
+                        ),
+                    }
+                )
         if c.management_dir:
             self.enable_project_management(c.management_dir)
@@ -432,6 +441,8 @@ class RslearnLightningCLI(LightningCLI):
 def model_handler() -> None:
     """Handler for any rslearn model X commands."""
+    init_jsonargparse()
     RslearnLightningCLI(
         model_class=RslearnLightningModule,
         datamodule_class=RslearnDataModule,

rslearn/main.py CHANGED Viewed

@@ -27,13 +27,13 @@ from rslearn.dataset.handler_summaries import (
     PrepareDatasetWindowsSummary,
     UnknownIngestCounts,
 )
-from rslearn.dataset.index import DatasetIndex
 from rslearn.dataset.manage import (
     AttemptsCounter,
     materialize_dataset_windows,
     prepare_dataset_windows,
     retry,
 )
+from rslearn.dataset.storage.file import FileWindowStorage
 from rslearn.log_utils import get_logger
 from rslearn.tile_stores import get_tile_store_with_layer
 from rslearn.utils import Projection, STGeometry
@@ -315,7 +315,8 @@ def apply_on_windows(
         load_workers: optional different number of workers to use for loading the
             windows. If set, workers controls the number of workers to process the
             jobs, while load_workers controls the number of workers to use for reading
-            windows from the rslearn dataset.
+            windows from the rslearn dataset. Workers is only passed if the window
+            storage is FileWindowStorage.
         batch_size: if workers > 0, the maximum number of windows to pass to the
             function.
         jobs_per_process: optional, terminate processes after they have handled this
@@ -336,11 +337,14 @@ def apply_on_windows(
     else:
         groups = group
-    if load_workers is None:
-        load_workers = workers
-    windows = dataset.load_windows(
-        groups=groups, names=names, workers=load_workers, show_progress=True
-    )
+    # Load the windows. We pass workers and show_progress if it is FileWindowStorage.
+    kwargs: dict[str, Any] = {}
+    if isinstance(dataset.storage, FileWindowStorage):
+        if load_workers is None:
+            load_workers = workers
+        kwargs["workers"] = load_workers
+        kwargs["show_progress"] = True
+    windows = dataset.load_windows(groups=groups, names=names, **kwargs)
     logger.info(f"found {len(windows)} windows")
     if hasattr(f, "get_jobs"):
@@ -376,7 +380,7 @@ def apply_on_windows(
 def apply_on_windows_args(f: Callable[..., Any], args: argparse.Namespace) -> None:
     """Call apply_on_windows with arguments passed via command-line interface."""
-    dataset = Dataset(UPath(args.root), args.disabled_layers)
+    dataset = Dataset(UPath(args.root), disabled_layers=args.disabled_layers)
     apply_on_windows(
         f=f,
         dataset=dataset,
@@ -798,35 +802,6 @@ def dataset_materialize() -> None:
     apply_on_windows_args(fn, args)
-@register_handler("dataset", "build_index")
-def dataset_build_index() -> None:
-    """Handler for the rslearn dataset build_index command."""
-    parser = argparse.ArgumentParser(
-        prog="rslearn dataset build_index",
-        description=("rslearn dataset build_index: " + "create a dataset index file"),
-    )
-    parser.add_argument(
-        "--root",
-        type=str,
-        required=True,
-        help="Dataset path",
-    )
-    parser.add_argument(
-        "--workers",
-        type=int,
-        default=16,
-        help="Number of workers",
-    )
-    args = parser.parse_args(args=sys.argv[3:])
-    ds_path = UPath(args.root)
-    dataset = Dataset(ds_path)
-    index = DatasetIndex.build_index(
-        dataset=dataset,
-        workers=args.workers,
-    )
-    index.save_index(ds_path)
 @register_handler("model", "fit")
 def model_fit() -> None:
     """Handler for rslearn model fit."""

rslearn/models/anysat.py CHANGED Viewed

@@ -4,11 +4,13 @@ This code loads the AnySat model from torch hub. See
 https://github.com/gastruc/AnySat for applicable license and copyright information.
 """
-from typing import Any
 import torch
 from einops import rearrange
+from rslearn.train.model_context import ModelContext
+from .component import FeatureExtractor, FeatureMaps
 # AnySat github: https://github.com/gastruc/AnySat
 # Modalities and expected resolutions (meters)
 MODALITY_RESOLUTIONS: dict[str, float] = {
@@ -44,7 +46,7 @@ MODALITY_BANDS: dict[str, list[str]] = {
 TIME_SERIES_MODALITIES = {"s2", "s1-asc", "s1", "alos", "l7", "l8", "modis"}
-class AnySat(torch.nn.Module):
+class AnySat(FeatureExtractor):
     """AnySat backbone (outputs one feature map)."""
     def __init__(
@@ -117,17 +119,17 @@ class AnySat(torch.nn.Module):
         )
         self._embed_dim = 768  # base width, 'dense' returns 2x
-    def forward(self, inputs: list[dict[str, Any]]) -> list[torch.Tensor]:
+    def forward(self, context: ModelContext) -> FeatureMaps:
         """Forward pass for the AnySat model.
         Args:
-            inputs: input dicts that must include modalities as keys which are defined in the self.modalities list
+            context: the model context. Input dicts must include modalities as keys
+                which are defined in the self.modalities list
         Returns:
-            List[torch.Tensor]: Single-scale feature tensors from the encoder.
+            a FeatureMaps with one feature map at the configured patch size.
         """
-        if not inputs:
-            raise ValueError("empty inputs")
+        inputs = context.inputs
         batch: dict[str, torch.Tensor] = {}
         spatial_extent: tuple[float, float] | None = None
@@ -192,7 +194,7 @@ class AnySat(torch.nn.Module):
             kwargs["output_modality"] = self.output_modality
         features = self.model(batch, **kwargs)
-        return [rearrange(features, "b h w d -> b d h w")]
+        return FeatureMaps([rearrange(features, "b h w d -> b d h w")])
     def get_backbone_channels(self) -> list:
         """Returns the output channels of this model when used as a backbone.

rslearn 0.0.17__py3-none-any.whl → 0.0.19__py3-none-any.whl

rslearn 0.0.17py3-none-any.whl → 0.0.19py3-none-any.whl