PyPI - rslearn - Versions diffs - 0.0.26__py3-none-any.whl → 0.0.28__py3-none-any.whl - Mend

rslearn 0.0.26py3-none-any.whl → 0.0.28py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

rslearn/data_sources/__init__.py +2 -0
rslearn/data_sources/aws_landsat.py +44 -161
rslearn/data_sources/aws_open_data.py +2 -4
rslearn/data_sources/aws_sentinel1.py +1 -3
rslearn/data_sources/aws_sentinel2_element84.py +54 -165
rslearn/data_sources/climate_data_store.py +1 -3
rslearn/data_sources/copernicus.py +1 -2
rslearn/data_sources/data_source.py +1 -1
rslearn/data_sources/direct_materialize_data_source.py +336 -0
rslearn/data_sources/earthdaily.py +52 -155
rslearn/data_sources/earthdatahub.py +425 -0
rslearn/data_sources/eurocrops.py +1 -2
rslearn/data_sources/gcp_public_data.py +1 -2
rslearn/data_sources/google_earth_engine.py +1 -2
rslearn/data_sources/hf_srtm.py +595 -0
rslearn/data_sources/local_files.py +1 -1
rslearn/data_sources/openstreetmap.py +1 -1
rslearn/data_sources/planet.py +1 -2
rslearn/data_sources/planet_basemap.py +1 -2
rslearn/data_sources/planetary_computer.py +183 -186
rslearn/data_sources/soilgrids.py +3 -3
rslearn/data_sources/stac.py +1 -2
rslearn/data_sources/usda_cdl.py +1 -3
rslearn/data_sources/usgs_landsat.py +7 -254
rslearn/data_sources/worldcereal.py +1 -1
rslearn/data_sources/worldcover.py +1 -1
rslearn/data_sources/worldpop.py +1 -1
rslearn/data_sources/xyz_tiles.py +5 -9
rslearn/dataset/storage/file.py +16 -12
rslearn/models/concatenate_features.py +6 -1
rslearn/tile_stores/default.py +4 -2
rslearn/train/{all_patches_dataset.py → all_crops_dataset.py} +120 -117
rslearn/train/data_module.py +36 -33
rslearn/train/dataset.py +159 -68
rslearn/train/lightning_module.py +60 -4
rslearn/train/metrics.py +162 -0
rslearn/train/model_context.py +3 -3
rslearn/train/prediction_writer.py +69 -41
rslearn/train/tasks/classification.py +14 -1
rslearn/train/tasks/detection.py +5 -5
rslearn/train/tasks/per_pixel_regression.py +19 -6
rslearn/train/tasks/regression.py +19 -3
rslearn/train/tasks/segmentation.py +17 -0
rslearn/utils/__init__.py +2 -0
rslearn/utils/fsspec.py +51 -1
rslearn/utils/geometry.py +21 -0
rslearn/utils/m2m_api.py +251 -0
rslearn/utils/retry_session.py +43 -0
{rslearn-0.0.26.dist-info → rslearn-0.0.28.dist-info}/METADATA +6 -3
{rslearn-0.0.26.dist-info → rslearn-0.0.28.dist-info}/RECORD +55 -50
rslearn/data_sources/earthdata_srtm.py +0 -282
{rslearn-0.0.26.dist-info → rslearn-0.0.28.dist-info}/WHEEL +0 -0
{rslearn-0.0.26.dist-info → rslearn-0.0.28.dist-info}/entry_points.txt +0 -0
{rslearn-0.0.26.dist-info → rslearn-0.0.28.dist-info}/licenses/LICENSE +0 -0
{rslearn-0.0.26.dist-info → rslearn-0.0.28.dist-info}/licenses/NOTICE +0 -0
{rslearn-0.0.26.dist-info → rslearn-0.0.28.dist-info}/top_level.txt +0 -0

rslearn/data_sources/usgs_landsat.py CHANGED Viewed

@@ -4,12 +4,9 @@
 """
 import io
-import json
 import os
 import shutil
 import tempfile
-import time
-import uuid
 from collections.abc import Generator
 from datetime import UTC, datetime, timedelta
 from typing import Any, BinaryIO
@@ -24,246 +21,7 @@ from rslearn.data_sources import DataSource, DataSourceContext, Item
 from rslearn.data_sources.utils import match_candidate_items_to_window
 from rslearn.tile_stores import TileStoreWithLayer
 from rslearn.utils import STGeometry
-class APIException(Exception):
-    """Exception raised for M2M API errors."""
-    pass
-class M2MAPIClient:
-    """An API client for interacting with the USGS M2M API."""
-    api_url = "https://m2m.cr.usgs.gov/api/api/json/stable/"
-    pagination_size = 1000
-    def __init__(
-        self,
-        username: str,
-        password: str | None = None,
-        token: str | None = None,
-        timeout: timedelta = timedelta(seconds=120),
-    ) -> None:
-        """Initialize a new M2MAPIClient.
-        Args:
-            username: the EROS username
-            password: the EROS password
-            token: the application token. One of password or token must be specified.
-            timeout: timeout for requests.
-        """
-        self.username = username
-        self.timeout = timeout
-        if password is not None and token is not None:
-            raise ValueError("only one of password or token can be specified")
-        if password is not None:
-            json_data = json.dumps({"username": self.username, "password": password})
-            response = requests.post(
-                self.api_url + "login",
-                data=json_data,
-                timeout=self.timeout.total_seconds(),
-            )
-        elif token is not None:
-            json_data = json.dumps({"username": username, "token": token})
-            response = requests.post(
-                self.api_url + "login-token",
-                data=json_data,
-                timeout=self.timeout.total_seconds(),
-            )
-        else:
-            raise ValueError("one of password or token must be specified")
-        response.raise_for_status()
-        self.auth_token = response.json()["data"]
-    def request(
-        self, endpoint: str, data: dict[str, Any] | None = None
-    ) -> dict[str, Any] | None:
-        """Make a request to the API.
-        Args:
-            endpoint: the endpoint to call
-            data: POST data to pass
-        Returns:
-            JSON response data if any
-        """
-        response = requests.post(
-            self.api_url + endpoint,
-            headers={"X-Auth-Token": self.auth_token},
-            data=json.dumps(data),
-            timeout=self.timeout.total_seconds(),
-        )
-        response.raise_for_status()
-        if response.text:
-            response_dict = response.json()
-            if response_dict["errorMessage"]:
-                raise APIException(response_dict["errorMessage"])
-            return response_dict
-        return None
-    def close(self) -> None:
-        """Logout from the API."""
-        self.request("logout")
-    def __enter__(self) -> "M2MAPIClient":
-        """Enter function to provide with semantics."""
-        return self
-    def __exit__(self) -> None:
-        """Exit function to provide with semantics.
-        Logs out the API.
-        """
-        self.close()
-    def get_filters(self, dataset_name: str) -> list[dict[str, Any]]:
-        """Returns filters available for the given dataset.
-        Args:
-            dataset_name: the dataset name e.g. landsat_ot_c2_l1
-        Returns:
-            list of filter objects
-        """
-        response_dict = self.request("dataset-filters", {"datasetName": dataset_name})
-        if response_dict is None:
-            raise APIException("No response from API")
-        return response_dict["data"]
-    def scene_search(
-        self,
-        dataset_name: str,
-        acquisition_time_range: tuple[datetime, datetime] | None = None,
-        cloud_cover_range: tuple[int, int] | None = None,
-        bbox: tuple[float, float, float, float] | None = None,
-        metadata_filter: dict[str, Any] | None = None,
-    ) -> list[dict[str, Any]]:
-        """Search for scenes matching the arguments.
-        Args:
-            dataset_name: the dataset name e.g. landsat_ot_c2_l1
-            acquisition_time_range: optional filter on the acquisition time
-            cloud_cover_range: optional filter on the cloud cover
-            bbox: optional spatial filter
-            metadata_filter: optional metadata filter dict
-        """
-        base_data: dict[str, Any] = {"datasetName": dataset_name, "sceneFilter": {}}
-        if acquisition_time_range:
-            base_data["sceneFilter"]["acquisitionFilter"] = {
-                "start": acquisition_time_range[0].isoformat(),
-                "end": acquisition_time_range[1].isoformat(),
-            }
-        if cloud_cover_range:
-            base_data["sceneFilter"]["cloudCoverFilter"] = {
-                "min": cloud_cover_range[0],
-                "max": cloud_cover_range[1],
-                "includeUnknown": False,
-            }
-        if bbox:
-            base_data["sceneFilter"]["spatialFilter"] = {
-                "filterType": "mbr",
-                "lowerLeft": {"longitude": bbox[0], "latitude": bbox[1]},
-                "upperRight": {"longitude": bbox[2], "latitude": bbox[3]},
-            }
-        if metadata_filter:
-            base_data["sceneFilter"]["metadataFilter"] = metadata_filter
-        starting_number = 1
-        results = []
-        while True:
-            cur_data = base_data.copy()
-            cur_data["startingNumber"] = starting_number
-            cur_data["maxResults"] = self.pagination_size
-            response_dict = self.request("scene-search", cur_data)
-            if response_dict is None:
-                raise APIException("No response from API")
-            data = response_dict["data"]
-            results.extend(data["results"])
-            if data["recordsReturned"] < self.pagination_size:
-                break
-            starting_number += self.pagination_size
-        return results
-    def get_scene_metadata(self, dataset_name: str, entity_id: str) -> dict[str, Any]:
-        """Get detailed metadata for a scene.
-        Args:
-            dataset_name: the dataset name in which to search
-            entity_id: the entity ID of the scene
-        Returns:
-            full scene metadata
-        """
-        response_dict = self.request(
-            "scene-metadata",
-            {
-                "datasetName": dataset_name,
-                "entityId": entity_id,
-                "metadataType": "full",
-            },
-        )
-        if response_dict is None:
-            raise APIException("No response from API")
-        return response_dict["data"]
-    def get_downloadable_products(
-        self, dataset_name: str, entity_id: str
-    ) -> list[dict[str, Any]]:
-        """Get the downloadable products for a given scene.
-        Args:
-            dataset_name: the dataset name
-            entity_id: the entity ID of the scene
-        Returns:
-            list of downloadable products
-        """
-        data = {"datasetName": dataset_name, "entityIds": [entity_id]}
-        response_dict = self.request("download-options", data)
-        if response_dict is None:
-            raise APIException("No response from API")
-        return response_dict["data"]
-    def get_download_url(self, entity_id: str, product_id: str) -> str:
-        """Get the download URL for a given product.
-        Args:
-            entity_id: the entity ID of the product
-            product_id: the product ID of the product
-        Returns:
-            the download URL
-        """
-        label = str(uuid.uuid4())
-        data = {
-            "downloads": [
-                {"label": label, "entityId": entity_id, "productId": product_id}
-            ]
-        }
-        response_dict = self.request("download-request", data)
-        if response_dict is None:
-            raise APIException("No response from API")
-        response = response_dict["data"]
-        while True:
-            response_dict = self.request("download-retrieve", {"label": label})
-            if response_dict is None:
-                raise APIException("No response from API")
-            response = response_dict["data"]
-            if len(response["available"]) > 0:
-                return response["available"][0]["url"]
-            if len(response["requested"]) == 0:
-                raise Exception("Did not get download URL")
-            if response["requested"][0].get("url"):
-                return response["requested"][0]["url"]
-            time.sleep(10)
+from rslearn.utils.m2m_api import APIException, M2MAPIClient
 class LandsatOliTirsItem(Item):
@@ -314,30 +72,26 @@ class LandsatOliTirs(DataSource):
     def __init__(
         self,
-        username: str,
-        sort_by: str | None = None,
-        password: str | None = None,
+        username: str | None = None,
         token: str | None = None,
+        sort_by: str | None = None,
         timeout: timedelta = timedelta(seconds=10),
         context: DataSourceContext = DataSourceContext(),
     ):
         """Initialize a new LandsatOliTirs instance.
         Args:
-            username: EROS username
+            username: EROS username (see M2MAPIClient).
+            token: EROS application token (see M2MAPIClient).
             sort_by: can be "cloud_cover", default arbitrary order; only has effect for
                 SpaceMode.WITHIN.
-            password: EROS password (see M2MAPIClient).
-            token: EROS application token (see M2MAPIClient).
             timeout: timeout for requests.
             context: the data source context.
         """
         self.sort_by = sort_by
         self.timeout = timeout
-        self.client = M2MAPIClient(
-            username, password=password, token=token, timeout=timeout
-        )
+        self.client = M2MAPIClient(username=username, token=token, timeout=timeout)
     def _scene_metadata_to_item(self, result: dict[str, Any]) -> LandsatOliTirsItem:
         """Convert scene metadata from the API to a LandsatOliTirsItem."""
@@ -429,9 +183,8 @@ class LandsatOliTirs(DataSource):
         )
         return self._scene_metadata_to_item(scene_metadata)
-    def deserialize_item(self, serialized_item: Any) -> Item:
+    def deserialize_item(self, serialized_item: dict) -> Item:
         """Deserializes an item from JSON-decoded data."""
-        assert isinstance(serialized_item, dict)
         return LandsatOliTirsItem.deserialize(serialized_item)
     def _get_download_urls(self, item: Item) -> dict[str, tuple[str, str]]:

rslearn/data_sources/worldcereal.py CHANGED Viewed

@@ -291,7 +291,7 @@ class WorldCereal(LocalFiles):
             raise ValueError(f"No AEZ files found for {self.band}")
         super().__init__(
-            src_dir=tif_dir,
+            src_dir=tif_dir.absolute().as_uri(),
             raster_item_specs=item_specs,
             layer_type=LayerType.RASTER,
             context=context,

rslearn/data_sources/worldcover.py CHANGED Viewed

@@ -75,7 +75,7 @@ class WorldCover(LocalFiles):
         tif_dir = self.download_worldcover_data(worldcover_upath)
         super().__init__(
-            src_dir=tif_dir,
+            src_dir=tif_dir.absolute().as_uri(),
             layer_type=LayerType.RASTER,
             context=context,
         )

rslearn/data_sources/worldpop.py CHANGED Viewed

@@ -80,7 +80,7 @@ class WorldPop(LocalFiles):
         worldpop_upath.mkdir(parents=True, exist_ok=True)
         self.download_worldpop_data(worldpop_upath, timeout)
         super().__init__(
-            src_dir=worldpop_upath,
+            src_dir=worldpop_upath.absolute().as_uri(),
             layer_type=LayerType.RASTER,
             context=context,
         )

rslearn/data_sources/xyz_tiles.py CHANGED Viewed

@@ -19,7 +19,7 @@ from rslearn.config import LayerConfig, QueryConfig
 from rslearn.dataset import Window
 from rslearn.dataset.materialize import RasterMaterializer
 from rslearn.tile_stores import TileStore, TileStoreWithLayer
-from rslearn.utils import PixelBounds, Projection, STGeometry
+from rslearn.utils import PixelBounds, Projection, STGeometry, get_global_raster_bounds
 from rslearn.utils.array import copy_spatial_array
 from rslearn.utils.raster_format import get_transform_from_projection_and_bounds
@@ -184,7 +184,7 @@ class XyzTiles(DataSource, TileStore):
             groups.append(cur_groups)
         return groups
-    def deserialize_item(self, serialized_item: Any) -> Item:
+    def deserialize_item(self, serialized_item: dict) -> Item:
         """Deserializes an item from JSON-decoded data."""
         return Item.deserialize(serialized_item)
@@ -278,13 +278,9 @@ class XyzTiles(DataSource, TileStore):
         Returns:
             the bounds of the raster in the projection.
         """
-        geom = STGeometry(self.projection, self.shp, None).to_projection(projection)
-        return (
-            int(geom.shp.bounds[0]),
-            int(geom.shp.bounds[1]),
-            int(geom.shp.bounds[2]),
-            int(geom.shp.bounds[3]),
-        )
+        # XyzTiles is a global data source, so we return global raster bounds based on
+        # the projection.
+        return get_global_raster_bounds(projection)
     def read_raster(
         self,

rslearn/dataset/storage/file.py CHANGED Viewed

@@ -15,7 +15,7 @@ from rslearn.dataset.window import (
     get_window_layer_dir,
 )
 from rslearn.log_utils import get_logger
-from rslearn.utils.fsspec import open_atomic
+from rslearn.utils.fsspec import iter_nonhidden_subdirs, open_atomic
 from rslearn.utils.mp import star_imap_unordered
 from .storage import WindowStorage, WindowStorageFactory
@@ -77,8 +77,8 @@ class FileWindowStorage(WindowStorage):
         window_dirs = []
         if not groups:
             groups = []
-            for p in (self.path / "windows").iterdir():
-                groups.append(p.name)
+            for group_dir in iter_nonhidden_subdirs(self.path / "windows"):
+                groups.append(group_dir.name)
         for group in groups:
             group_dir = self.path / "windows" / group
             if not group_dir.exists():
@@ -86,16 +86,20 @@ class FileWindowStorage(WindowStorage):
                     f"Skipping group directory {group_dir} since it does not exist"
                 )
                 continue
+            if not group_dir.is_dir():
+                logger.warning(
+                    f"Skipping group path {group_dir} since it is not a directory"
+                )
+                continue
             if names:
-                cur_names = names
+                for window_name in names:
+                    window_dir = group_dir / window_name
+                    if not window_dir.is_dir():
+                        continue
+                    window_dirs.append(window_dir)
             else:
-                cur_names = []
-                for p in group_dir.iterdir():
-                    cur_names.append(p.name)
-            for window_name in cur_names:
-                window_dir = group_dir / window_name
-                window_dirs.append(window_dir)
+                for window_dir in iter_nonhidden_subdirs(group_dir):
+                    window_dirs.append(window_dir)
         if workers == 0:
             windows = [load_window(self, window_dir) for window_dir in window_dirs]
@@ -162,7 +166,7 @@ class FileWindowStorage(WindowStorage):
             return []
         completed_layers = []
-        for layer_dir in layers_directory.iterdir():
+        for layer_dir in iter_nonhidden_subdirs(layers_directory):
             layer_name, group_idx = get_layer_and_group_from_dir_name(layer_dir.name)
             if not self.is_layer_completed(group, name, layer_name, group_idx):
                 continue

rslearn/models/concatenate_features.py CHANGED Viewed

@@ -3,6 +3,7 @@
 from typing import Any
 import torch
+from einops import rearrange
 from rslearn.train.model_context import ModelContext
@@ -79,7 +80,11 @@ class ConcatenateFeatures(IntermediateComponent):
             )
         add_data = torch.stack(
-            [input_data[self.key] for input_data in context.inputs], dim=0
+            [
+                rearrange(input_data[self.key].image, "c t h w -> (c t) h w")
+                for input_data in context.inputs
+            ],
+            dim=0,
         )
         add_features = self.conv_layers(add_data)

rslearn/tile_stores/default.py CHANGED Viewed

@@ -15,6 +15,8 @@ from upath import UPath
 from rslearn.const import WGS84_PROJECTION
 from rslearn.utils.feature import Feature
 from rslearn.utils.fsspec import (
+    iter_nonhidden_files,
+    iter_nonhidden_subdirs,
     join_upath,
     open_atomic,
     open_rasterio_upath_reader,
@@ -129,7 +131,7 @@ class DefaultTileStore(TileStore):
             ValueError: if no file is found.
         """
         raster_dir = self._get_raster_dir(layer_name, item_name, bands)
-        for fname in raster_dir.iterdir():
+        for fname in iter_nonhidden_files(raster_dir):
             # Ignore completed sentinel files, bands files, as well as temporary files created by
             # open_atomic (in case this tile store is on local filesystem).
             if fname.name == COMPLETED_FNAME:
@@ -175,7 +177,7 @@ class DefaultTileStore(TileStore):
             return []
         bands: list[list[str]] = []
-        for raster_dir in item_dir.iterdir():
+        for raster_dir in iter_nonhidden_subdirs(item_dir):
             if not (raster_dir / BANDS_FNAME).exists():
                 # This is likely a legacy directory where the bands are only encoded in
                 # the directory name, so we have to rely on that.

rslearn 0.0.26__py3-none-any.whl → 0.0.28__py3-none-any.whl

rslearn 0.0.26py3-none-any.whl → 0.0.28py3-none-any.whl