PyPI - rslearn - Versions diffs - 0.0.12__py3-none-any.whl → 0.0.14__py3-none-any.whl - Mend

rslearn 0.0.12py3-none-any.whl → 0.0.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

rslearn/config/dataset.py +23 -14
rslearn/data_sources/planetary_computer.py +52 -0
rslearn/dataset/handler_summaries.py +1 -0
rslearn/dataset/manage.py +16 -2
rslearn/lightning_cli.py +67 -0
rslearn/main.py +8 -62
rslearn/models/olmoearth_pretrain/model.py +1 -0
rslearn/train/all_patches_dataset.py +458 -0
rslearn/train/data_module.py +4 -2
rslearn/train/dataset.py +10 -446
rslearn/train/prediction_writer.py +25 -8
rslearn/train/tasks/embedding.py +116 -0
rslearn/utils/array.py +6 -4
rslearn/utils/raster_format.py +38 -0
{rslearn-0.0.12.dist-info → rslearn-0.0.14.dist-info}/METADATA +2 -2
{rslearn-0.0.12.dist-info → rslearn-0.0.14.dist-info}/RECORD +21 -18
{rslearn-0.0.12.dist-info → rslearn-0.0.14.dist-info}/WHEEL +0 -0
{rslearn-0.0.12.dist-info → rslearn-0.0.14.dist-info}/entry_points.txt +0 -0
{rslearn-0.0.12.dist-info → rslearn-0.0.14.dist-info}/licenses/LICENSE +0 -0
{rslearn-0.0.12.dist-info → rslearn-0.0.14.dist-info}/licenses/NOTICE +0 -0
{rslearn-0.0.12.dist-info → rslearn-0.0.14.dist-info}/top_level.txt +0 -0

rslearn/config/dataset.py CHANGED Viewed

@@ -8,7 +8,6 @@ from typing import Any
 import numpy as np
 import numpy.typing as npt
 import pytimeparse
-import torch
 from rasterio.enums import Resampling
 from rslearn.utils import PixelBounds, Projection
@@ -49,15 +48,6 @@ class DType(Enum):
             return np.float32
         raise ValueError(f"unable to handle numpy dtype {self}")
-    def get_torch_dtype(self) -> torch.dtype:
-        """Returns pytorch dtype object corresponding to this DType."""
-        if self == DType.INT32:
-            return torch.int32
-        elif self == DType.FLOAT32:
-            return torch.float32
-        else:
-            raise ValueError(f"unable to handle torch dtype {self}")
 RESAMPLING_METHODS = {
     "nearest": Resampling.nearest,
@@ -125,7 +115,8 @@ class BandSetConfig:
         self,
         config_dict: dict[str, Any],
         dtype: DType,
-        bands: list[str],
+        bands: list[str] | None = None,
+        num_bands: int | None = None,
         format: dict[str, Any] | None = None,
         zoom_offset: int = 0,
         remap: dict[str, Any] | None = None,
@@ -137,7 +128,10 @@ class BandSetConfig:
         Args:
             config_dict: the config dict used to configure this BandSetConfig
             dtype: the pixel value type to store tiles in
-            bands: list of band names in this BandSetConfig
+            bands: list of band names in this BandSetConfig. One of bands or num_bands
+                must be set.
+            num_bands: the number of bands in this band set. The bands will be named
+                B00, B01, B02, etc.
             format: the format to store tiles in, defaults to geotiff
             zoom_offset: store images at a resolution higher or lower than the window
                 resolution. This enables keeping source data at its native resolution,
@@ -155,6 +149,14 @@ class BandSetConfig:
                 materialization when creating mosaics, to determine which parts of the
                 source images should be copied.
         """
+        if (bands is None and num_bands is None) or (
+            bands is not None and num_bands is not None
+        ):
+            raise ValueError("exactly one of bands and num_bands must be set")
+        if bands is None:
+            assert num_bands is not None
+            bands = [f"B{idx}" for idx in range(num_bands)]
         if class_names is not None and len(bands) != len(class_names):
             raise ValueError(
                 f"the number of class lists ({len(class_names)}) does not match the number of bands ({len(bands)})"
@@ -187,9 +189,16 @@ class BandSetConfig:
         kwargs = dict(
             config_dict=config,
             dtype=DType(config["dtype"]),
-            bands=config["bands"],
         )
-        for k in ["format", "zoom_offset", "remap", "class_names", "nodata_vals"]:
+        for k in [
+            "bands",
+            "num_bands",
+            "format",
+            "zoom_offset",
+            "remap",
+            "class_names",
+            "nodata_vals",
+        ]:
             if k in config:
                 kwargs[k] = config[k]
         return BandSetConfig(**kwargs)  # type: ignore

rslearn/data_sources/planetary_computer.py CHANGED Viewed

@@ -827,3 +827,55 @@ class Sentinel1(PlanetaryComputer):
                 kwargs[k] = d[k]
         return Sentinel1(**kwargs)
+class Naip(PlanetaryComputer):
+    """A data source for NAIP data on Microsoft Planetary Computer.
+    See https://planetarycomputer.microsoft.com/dataset/naip.
+    """
+    COLLECTION_NAME = "naip"
+    ASSET_BANDS = {"image": ["R", "G", "B", "NIR"]}
+    def __init__(
+        self,
+        **kwargs: Any,
+    ):
+        """Initialize a new Naip instance.
+        Args:
+            band_names: list of bands to try to ingest.
+            kwargs: additional arguments to pass to PlanetaryComputer.
+        """
+        super().__init__(
+            collection_name=self.COLLECTION_NAME,
+            asset_bands=self.ASSET_BANDS,
+            **kwargs,
+        )
+    @staticmethod
+    def from_config(config: RasterLayerConfig, ds_path: UPath) -> "Naip":
+        """Creates a new Naip instance from a configuration dictionary."""
+        if config.data_source is None:
+            raise ValueError("config.data_source is required")
+        d = config.data_source.config_dict
+        kwargs = {}
+        if "timeout_seconds" in d:
+            kwargs["timeout"] = timedelta(seconds=d["timeout_seconds"])
+        if "cache_dir" in d:
+            kwargs["cache_dir"] = join_upath(ds_path, d["cache_dir"])
+        simple_optionals = [
+            "query",
+            "sort_by",
+            "sort_ascending",
+            "max_items_per_client",
+        ]
+        for k in simple_optionals:
+            if k in d:
+                kwargs[k] = d[k]
+        return Naip(**kwargs)

rslearn/dataset/handler_summaries.py CHANGED Viewed

@@ -20,6 +20,7 @@ class LayerPrepareSummary:
     # Counts
     windows_prepared: int
     windows_skipped: int
+    windows_rejected: int
     get_items_attempts: int

rslearn/dataset/manage.py CHANGED Viewed

@@ -118,6 +118,7 @@ def prepare_dataset_windows(
                     duration_seconds=time.monotonic() - layer_start_time,
                     windows_prepared=0,
                     windows_skipped=len(windows),
+                    windows_rejected=0,
                     get_items_attempts=0,
                 )
             )
@@ -141,6 +142,7 @@ def prepare_dataset_windows(
                     duration_seconds=time.monotonic() - layer_start_time,
                     windows_prepared=0,
                     windows_skipped=len(windows),
+                    windows_rejected=0,
                     get_items_attempts=0,
                 )
             )
@@ -181,6 +183,9 @@ def prepare_dataset_windows(
             attempts_counter=attempts_counter,
         )
+        windows_prepared = 0
+        windows_rejected = 0
+        min_matches = data_source_cfg.query_config.min_matches
         for window, result in zip(needed_windows, results):
             layer_datas = window.load_layer_datas()
             layer_datas[layer_name] = WindowLayerData(
@@ -191,13 +196,22 @@ def prepare_dataset_windows(
             )
             window.save_layer_datas(layer_datas)
+            # If result is empty and min_matches > 0, window was rejected due to min_matches
+            if len(result) == 0 and min_matches > 0:
+                windows_rejected += 1
+            else:
+                windows_prepared += 1
+        windows_skipped = len(windows) - len(needed_windows)
         layer_summaries.append(
             LayerPrepareSummary(
                 layer_name=layer_name,
                 data_source_name=data_source_cfg.name,
                 duration_seconds=time.monotonic() - layer_start_time,
-                windows_prepared=len(needed_windows),  # we assume all have succeeded
-                windows_skipped=len(windows) - len(needed_windows),
+                windows_prepared=windows_prepared,
+                windows_skipped=windows_skipped,
+                windows_rejected=windows_rejected,
                 get_items_attempts=attempts_counter.value,
             )
         )

rslearn/lightning_cli.py ADDED Viewed

@@ -0,0 +1,67 @@
+"""LightningCLI for rslearn."""
+import sys
+from lightning.pytorch.cli import LightningArgumentParser, LightningCLI
+from rslearn.arg_parser import RslearnArgumentParser
+from rslearn.train.data_module import RslearnDataModule
+from rslearn.train.lightning_module import RslearnLightningModule
+class RslearnLightningCLI(LightningCLI):
+    """LightningCLI that links data.tasks to model.tasks and supports environment variables."""
+    def add_arguments_to_parser(self, parser: LightningArgumentParser) -> None:
+        """Link data.tasks to model.tasks.
+        Args:
+            parser: the argument parser
+        """
+        # Link data.tasks to model.tasks
+        parser.link_arguments(
+            "data.init_args.task", "model.init_args.task", apply_on="instantiate"
+        )
+    def before_instantiate_classes(self) -> None:
+        """Called before Lightning class initialization.
+        Sets the dataset path for any configured RslearnPredictionWriter callbacks.
+        """
+        subcommand = self.config.subcommand
+        c = self.config[subcommand]
+        # If there is a RslearnPredictionWriter, set its path.
+        prediction_writer_callback = None
+        if "callbacks" in c.trainer:
+            for existing_callback in c.trainer.callbacks:
+                if (
+                    existing_callback.class_path
+                    == "rslearn.train.prediction_writer.RslearnWriter"
+                ):
+                    prediction_writer_callback = existing_callback
+        if prediction_writer_callback:
+            prediction_writer_callback.init_args.path = c.data.init_args.path
+        # Disable the sampler replacement, since the rslearn data module will set the
+        # sampler as needed.
+        c.trainer.use_distributed_sampler = False
+        # For predict, make sure that return_predictions is False.
+        # Otherwise all the predictions would be stored in memory which can lead to
+        # high memory consumption.
+        if subcommand == "predict":
+            c.return_predictions = False
+def model_handler() -> None:
+    """Handler for any rslearn model X commands."""
+    RslearnLightningCLI(
+        model_class=RslearnLightningModule,
+        datamodule_class=RslearnDataModule,
+        args=sys.argv[2:],
+        subclass_mode_model=True,
+        subclass_mode_data=True,
+        save_config_kwargs={"overwrite": True},
+        parser_class=RslearnArgumentParser,
+    )

rslearn/main.py CHANGED Viewed

@@ -10,11 +10,9 @@ from datetime import UTC, datetime, timedelta
 from typing import Any, TypeVar
 import tqdm
-from lightning.pytorch.cli import LightningArgumentParser, LightningCLI
 from rasterio.crs import CRS
 from upath import UPath
-from rslearn.arg_parser import RslearnArgumentParser
 from rslearn.config import LayerConfig
 from rslearn.const import WGS84_EPSG
 from rslearn.data_sources import Item, data_source_from_config
@@ -38,8 +36,6 @@ from rslearn.dataset.manage import (
 )
 from rslearn.log_utils import get_logger
 from rslearn.tile_stores import get_tile_store_with_layer
-from rslearn.train.data_module import RslearnDataModule
-from rslearn.train.lightning_module import RslearnLightningModule
 from rslearn.utils import Projection, STGeometry
 logger = get_logger(__name__)
@@ -831,85 +827,35 @@ def dataset_build_index() -> None:
     index.save_index(ds_path)
-class RslearnLightningCLI(LightningCLI):
-    """LightningCLI that links data.tasks to model.tasks and supports environment variables."""
-    def add_arguments_to_parser(self, parser: LightningArgumentParser) -> None:
-        """Link data.tasks to model.tasks.
-        Args:
-            parser: the argument parser
-        """
-        # Link data.tasks to model.tasks
-        parser.link_arguments(
-            "data.init_args.task", "model.init_args.task", apply_on="instantiate"
-        )
-    def before_instantiate_classes(self) -> None:
-        """Called before Lightning class initialization.
-        Sets the dataset path for any configured RslearnPredictionWriter callbacks.
-        """
-        subcommand = self.config.subcommand
-        c = self.config[subcommand]
-        # If there is a RslearnPredictionWriter, set its path.
-        prediction_writer_callback = None
-        if "callbacks" in c.trainer:
-            for existing_callback in c.trainer.callbacks:
-                if (
-                    existing_callback.class_path
-                    == "rslearn.train.prediction_writer.RslearnWriter"
-                ):
-                    prediction_writer_callback = existing_callback
-        if prediction_writer_callback:
-            prediction_writer_callback.init_args.path = c.data.init_args.path
-        # Disable the sampler replacement, since the rslearn data module will set the
-        # sampler as needed.
-        c.trainer.use_distributed_sampler = False
-        # For predict, make sure that return_predictions is False.
-        # Otherwise all the predictions would be stored in memory which can lead to
-        # high memory consumption.
-        if subcommand == "predict":
-            c.return_predictions = False
-def model_handler() -> None:
-    """Handler for any rslearn model X commands."""
-    RslearnLightningCLI(
-        model_class=RslearnLightningModule,
-        datamodule_class=RslearnDataModule,
-        args=sys.argv[2:],
-        subclass_mode_model=True,
-        subclass_mode_data=True,
-        save_config_kwargs={"overwrite": True},
-        parser_class=RslearnArgumentParser,
-    )
 @register_handler("model", "fit")
 def model_fit() -> None:
     """Handler for rslearn model fit."""
+    from .lightning_cli import model_handler
     model_handler()
 @register_handler("model", "validate")
 def model_validate() -> None:
     """Handler for rslearn model validate."""
+    from .lightning_cli import model_handler
     model_handler()
 @register_handler("model", "test")
 def model_test() -> None:
     """Handler for rslearn model test."""
+    from .lightning_cli import model_handler
     model_handler()
 @register_handler("model", "predict")
 def model_predict() -> None:
     """Handler for rslearn model predict."""
+    from .lightning_cli import model_handler
     model_handler()

rslearn/models/olmoearth_pretrain/model.py CHANGED Viewed

@@ -40,6 +40,7 @@ EMBEDDING_SIZES = {
     ModelID.OLMOEARTH_V1_NANO: 128,
     ModelID.OLMOEARTH_V1_TINY: 192,
     ModelID.OLMOEARTH_V1_BASE: 768,
+    ModelID.OLMOEARTH_V1_LARGE: 1024,
 }

rslearn 0.0.12__py3-none-any.whl → 0.0.14__py3-none-any.whl

rslearn 0.0.12py3-none-any.whl → 0.0.14py3-none-any.whl