PyPI - rslearn - Versions diffs - 0.0.23__py3-none-any.whl → 0.0.24__py3-none-any.whl - Mend

rslearn 0.0.23py3-none-any.whl → 0.0.24py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

rslearn/data_sources/planetary_computer.py CHANGED Viewed

@@ -3,7 +3,7 @@
 import os
 import tempfile
 import xml.etree.ElementTree as ET
-from datetime import timedelta
+from datetime import datetime, timedelta
 from typing import Any
 import affine
@@ -12,6 +12,7 @@ import planetary_computer
 import rasterio
 import requests
 from rasterio.enums import Resampling
+from typing_extensions import override
 from upath import UPath
 from rslearn.config import LayerConfig
@@ -24,11 +25,104 @@ from rslearn.tile_stores import TileStore, TileStoreWithLayer
 from rslearn.utils.fsspec import join_upath
 from rslearn.utils.geometry import PixelBounds, Projection, STGeometry
 from rslearn.utils.raster_format import get_raster_projection_and_bounds
+from rslearn.utils.stac import StacClient, StacItem
 from .copernicus import get_harmonize_callback
 logger = get_logger(__name__)
+# Max limit accepted by Planetary Computer API.
+PLANETARY_COMPUTER_LIMIT = 1000
+class PlanetaryComputerStacClient(StacClient):
+    """A StacClient subclass that handles Planetary Computer's pagination limits.
+    Planetary Computer STAC API does not support standard pagination and has a max
+    limit of 1000. If the initial query returns 1000 items, this client paginates
+    by sorting by ID and using gt (greater than) queries to fetch subsequent pages.
+    """
+    @override
+    def search(
+        self,
+        collections: list[str] | None = None,
+        bbox: tuple[float, float, float, float] | None = None,
+        intersects: dict[str, Any] | None = None,
+        date_time: datetime | tuple[datetime, datetime] | None = None,
+        ids: list[str] | None = None,
+        limit: int | None = None,
+        query: dict[str, Any] | None = None,
+        sortby: list[dict[str, str]] | None = None,
+    ) -> list[StacItem]:
+        # We will use sortby for pagination, so the caller must not set it.
+        if sortby is not None:
+            raise ValueError("sortby must not be set for PlanetaryComputerStacClient")
+        # First, try a simple query with the PC limit to detect if pagination is needed.
+        # We always use PLANETARY_COMPUTER_LIMIT for the request because PC doesn't
+        # support standard pagination, and we need to detect when we hit the limit
+        # to switch to ID-based pagination.
+        # We could just start sorting by ID here and do pagination, but we treate it as
+        # a special case to avoid sorting since that seems to speed up the query.
+        stac_items = super().search(
+            collections=collections,
+            bbox=bbox,
+            intersects=intersects,
+            date_time=date_time,
+            ids=ids,
+            limit=PLANETARY_COMPUTER_LIMIT,
+            query=query,
+        )
+        # If we got fewer than the PC limit, we have all the results.
+        if len(stac_items) < PLANETARY_COMPUTER_LIMIT:
+            return stac_items
+        # We hit the limit, so we need to paginate by ID.
+        # Re-fetch with sorting by ID to ensure consistent ordering for pagination.
+        logger.debug(
+            "Initial request returned %d items (at limit), switching to ID pagination",
+            len(stac_items),
+        )
+        all_items: list[StacItem] = []
+        last_id: str | None = None
+        while True:
+            # Build query with id > last_id if we're paginating.
+            combined_query: dict[str, Any] = dict(query) if query else {}
+            if last_id is not None:
+                combined_query["id"] = {"gt": last_id}
+            stac_items = super().search(
+                collections=collections,
+                bbox=bbox,
+                intersects=intersects,
+                date_time=date_time,
+                ids=ids,
+                limit=PLANETARY_COMPUTER_LIMIT,
+                query=combined_query if combined_query else None,
+                sortby=[{"field": "id", "direction": "asc"}],
+            )
+            all_items.extend(stac_items)
+            # If we got fewer than the limit, we've fetched everything.
+            if len(stac_items) < PLANETARY_COMPUTER_LIMIT:
+                break
+            # Otherwise, paginate using the last item's ID.
+            last_id = stac_items[-1].id
+            logger.debug(
+                "Got %d items, paginating with id > %s",
+                len(stac_items),
+                last_id,
+            )
+        logger.debug("Total items fetched: %d", len(all_items))
+        return all_items
 class PlanetaryComputer(StacDataSource, TileStore):
     """Modality-agnostic data source for data on Microsoft Planetary Computer.
@@ -100,6 +194,10 @@ class PlanetaryComputer(StacDataSource, TileStore):
             required_assets=required_assets,
             cache_dir=cache_upath,
         )
+        # Replace the client with PlanetaryComputerStacClient to handle PC's pagination limits.
+        self.client = PlanetaryComputerStacClient(self.STAC_ENDPOINT)
         self.asset_bands = asset_bands
         self.timeout = timeout
         self.skip_items_missing_assets = skip_items_missing_assets

rslearn/data_sources/stac.py CHANGED Viewed

@@ -12,6 +12,7 @@ from rslearn.const import WGS84_PROJECTION
 from rslearn.data_sources.data_source import Item, ItemLookupDataSource
 from rslearn.data_sources.utils import match_candidate_items_to_window
 from rslearn.log_utils import get_logger
+from rslearn.utils.fsspec import open_atomic
 from rslearn.utils.geometry import STGeometry
 from rslearn.utils.stac import StacClient, StacItem
@@ -187,7 +188,7 @@ class StacDataSource(ItemLookupDataSource[SourceItem]):
         # Finally we cache it if cache_dir is set.
         if cache_fname is not None:
-            with cache_fname.open("w") as f:
+            with open_atomic(cache_fname, "w") as f:
                 json.dump(item.serialize(), f)
         return item
@@ -259,7 +260,7 @@ class StacDataSource(ItemLookupDataSource[SourceItem]):
                     cache_fname = self.cache_dir / f"{item.name}.json"
                     if cache_fname.exists():
                         continue
-                    with cache_fname.open("w") as f:
+                    with open_atomic(cache_fname, "w") as f:
                         json.dump(item.serialize(), f)
             cur_groups = match_candidate_items_to_window(

rslearn/models/simple_time_series.py CHANGED Viewed

@@ -180,7 +180,7 @@ class SimpleTimeSeries(FeatureExtractor):
         # want to pass 2 timesteps to the model.
         # TODO is probably to make this behaviour clearer but lets leave it like
         # this for now to not break things.
-        num_timesteps = images.shape[1] // image_channels
+        num_timesteps = image_channels // images.shape[1]
         batched_timesteps = images.shape[2] // num_timesteps
         images = rearrange(
             images,

rslearn/train/tasks/segmentation.py CHANGED Viewed

@@ -59,6 +59,8 @@ class SegmentationTask(BasicTask):
         miou_metric_kwargs: dict[str, Any] = {},
         prob_scales: list[float] | None = None,
         other_metrics: dict[str, Metric] = {},
+        output_probs: bool = False,
+        output_class_idx: int | None = None,
         **kwargs: Any,
     ) -> None:
         """Initialize a new SegmentationTask.
@@ -92,6 +94,10 @@ class SegmentationTask(BasicTask):
                 this is only applied during prediction, not when computing val or test
                 metrics.
             other_metrics: additional metrics to configure on this task.
+            output_probs: if True, output raw softmax probabilities instead of class IDs
+                during prediction.
+            output_class_idx: if set along with output_probs, only output the probability
+                for this specific class index (single-channel output).
             kwargs: additional arguments to pass to BasicTask
         """
         super().__init__(**kwargs)
@@ -116,6 +122,8 @@ class SegmentationTask(BasicTask):
         self.miou_metric_kwargs = miou_metric_kwargs
         self.prob_scales = prob_scales
         self.other_metrics = other_metrics
+        self.output_probs = output_probs
+        self.output_class_idx = output_class_idx
     def process_inputs(
         self,
@@ -171,7 +179,9 @@ class SegmentationTask(BasicTask):
             metadata: metadata about the patch being read
         Returns:
-            CHW numpy array with one channel, containing the predicted class IDs.
+            CHW numpy array. If output_probs is False, returns one channel with
+            predicted class IDs. If output_probs is True, returns softmax probabilities
+            (num_classes channels, or 1 channel if output_class_idx is set).
         """
         if not isinstance(raw_output, torch.Tensor) or len(raw_output.shape) != 3:
             raise ValueError("the output for SegmentationTask must be a CHW tensor")
@@ -183,6 +193,15 @@ class SegmentationTask(BasicTask):
                     self.prob_scales, device=raw_output.device, dtype=raw_output.dtype
                 )[:, None, None]
             )
+        if self.output_probs:
+            # Return raw softmax probabilities
+            probs = raw_output.cpu().numpy()
+            if self.output_class_idx is not None:
+                # Return only the specified class probability
+                return probs[self.output_class_idx : self.output_class_idx + 1, :, :]
+            return probs
         classes = raw_output.argmax(dim=0).cpu().numpy()
         return classes[None, :, :]

rslearn/utils/raster_format.py CHANGED Viewed

@@ -476,6 +476,7 @@ class GeotiffRasterFormat(RasterFormat):
         bounds: PixelBounds,
         array: npt.NDArray[Any],
         fname: str | None = None,
+        nodata_val: int | float | None = None,
     ) -> None:
         """Encodes raster data.
@@ -485,6 +486,7 @@ class GeotiffRasterFormat(RasterFormat):
             bounds: the bounds of the raster data in the projection
             array: the raster data
             fname: override the filename to save as
+            nodata_val: set the nodata value when writing the raster.
         """
         if fname is None:
             fname = self.fname
@@ -520,6 +522,9 @@ class GeotiffRasterFormat(RasterFormat):
             profile["tiled"] = True
             profile["blockxsize"] = self.block_size
             profile["blockysize"] = self.block_size
+        # Set nodata_val if provided.
+        if nodata_val is not None:
+            profile["nodata"] = nodata_val
         profile.update(self.geotiff_options)
@@ -535,6 +540,7 @@ class GeotiffRasterFormat(RasterFormat):
         bounds: PixelBounds,
         resampling: Resampling = Resampling.bilinear,
         fname: str | None = None,
+        nodata_val: int | float | None = None,
     ) -> npt.NDArray[Any]:
         """Decodes raster data.
@@ -544,6 +550,16 @@ class GeotiffRasterFormat(RasterFormat):
             bounds: the bounds to read in the given projection.
             resampling: resampling method to use in case resampling is needed.
             fname: override the filename to read from
+            nodata_val: override the nodata value in the raster when reading. Pixels in
+                bounds that are not present in the source raster will be initialized to
+                this value. Note that, if the raster specifies a nodata value, and
+                some source pixels have that value, they will still be read under their
+                original value; overriding the nodata value is primarily useful if the
+                user wants out of bounds pixels to have a different value from the
+                source pixels, e.g. if the source data has background and foreground
+                classes (with background being nodata) but we want to read it in a
+                different projection and have out of bounds pixels be a third "invalid"
+                value.
         Returns:
             the raster data
@@ -561,6 +577,7 @@ class GeotiffRasterFormat(RasterFormat):
                 width=bounds[2] - bounds[0],
                 height=bounds[3] - bounds[1],
                 resampling=resampling,
+                src_nodata=nodata_val,
             ) as vrt:
                 return vrt.read()

rslearn/utils/stac.py CHANGED Viewed

@@ -101,6 +101,7 @@ class StacClient:
         ids: list[str] | None = None,
         limit: int | None = None,
         query: dict[str, Any] | None = None,
+        sortby: list[dict[str, str]] | None = None,
     ) -> list[StacItem]:
         """Execute a STAC item search.
@@ -117,6 +118,7 @@ class StacClient:
             limit: number of items per page. We will read all the pages.
             query: query dict, if STAC query extension is supported by this API. See
                 https://github.com/stac-api-extensions/query.
+            sortby: list of sort specifications, e.g. [{"field": "id", "direction": "asc"}].
         Returns:
             list of matching STAC items.
@@ -142,6 +144,8 @@ class StacClient:
             request_data["limit"] = limit
         if query is not None:
             request_data["query"] = query
+        if sortby is not None:
+            request_data["sortby"] = sortby
         # Handle pagination.
         cur_url = self.endpoint + "/search"

{rslearn-0.0.23.dist-info → rslearn-0.0.24.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: rslearn
-Version: 0.0.23
+Version: 0.0.24
 Summary: A library for developing remote sensing datasets and models
 Author: OlmoEarth Team
 License:                                  Apache License

{rslearn-0.0.23.dist-info → rslearn-0.0.24.dist-info}/RECORD RENAMED Viewed

@@ -25,9 +25,9 @@ rslearn/data_sources/local_files.py,sha256=mo5W_BxBl89EPTIHNDEpXM6qBjrP225KK0Pcm
 rslearn/data_sources/openstreetmap.py,sha256=TzZfouc2Z4_xjx2v_uv7aPn4tVW3flRVQN4qBfl507E,18161
 rslearn/data_sources/planet.py,sha256=6FWQ0bl1k3jwvwp4EVGi2qs3OD1QhnKOKP36mN4HELI,9446
 rslearn/data_sources/planet_basemap.py,sha256=e9R6FlagJjg8Z6Rc1dC6zK3xMkCohz8eohXqXmd29xg,9670
-rslearn/data_sources/planetary_computer.py,sha256=8kVatSXnwPUZljVOjj9vnVbOsmWhRdROi5YTiCmYmII,22594
+rslearn/data_sources/planetary_computer.py,sha256=k6CO5Yim2I-frlD8r2_uBo0CQFw89mN3_5mrv0Xk2WU,26449
 rslearn/data_sources/soilgrids.py,sha256=rwO4goFPQ7lx420FvYBHYFXdihnZqn_-IjdqtxQ9j2g,12455
-rslearn/data_sources/stac.py,sha256=l7V1QzvpNtoH_funiTSl1J8Lj1P3nMj24_fRpgCAslQ,10692
+rslearn/data_sources/stac.py,sha256=Xty1JDueAAonNVLRo8vfNBhlHrVLjhmZ-uRBYbrGvtA,10753
 rslearn/data_sources/usda_cdl.py,sha256=_WvxZkm0fbXfniRs6NT8iVCbTTmVPflDhsFT2ci6_Dk,6879
 rslearn/data_sources/usgs_landsat.py,sha256=kPOb3hsZe5-guUcFZZkwzcRpYZ3Zo7Bk4E829q_xiyU,18516
 rslearn/data_sources/utils.py,sha256=v_90ALOuts7RHNcx-j8o-aQ_aFjh8ZhXrmsaa9uEGDA,11651
@@ -69,7 +69,7 @@ rslearn/models/prithvi.py,sha256=J45eC1pd4l5AGlr19Qjrjrw5PPwvYE9bNM5qCFoznmg,403
 rslearn/models/resize_features.py,sha256=U7ZIVwwToJJnwchFG59wLWWP9eikHDB_1c4OtpubxHU,1693
 rslearn/models/sam2_enc.py,sha256=WZOtlp0FjaVztW4gEVIcsFQdKArS9iblRODP0b6Oc8M,3641
 rslearn/models/satlaspretrain.py,sha256=2R48ulbtd44Qy2FYJCkllE2Wk35eZxkc79ruSgkmgcQ,3384
-rslearn/models/simple_time_series.py,sha256=Nfk5E3d9W-4AyLQiy-P8p-JvxmFYE3FBrvOgttjXSMw,14678
+rslearn/models/simple_time_series.py,sha256=farQwt_nJVyAbgaM2UzdyqpDuIO0SLmHr9e9EVPSWCE,14678
 rslearn/models/singletask.py,sha256=9DM9a9-Mv3vVQqRhPOIXG2HHuVqVa_zuvgafeeYh4r0,1903
 rslearn/models/ssl4eo_s12.py,sha256=DOlpIj6NfjIlWyJ27m9Xo8TMlovBDstFq0ARnmAJ6qY,3919
 rslearn/models/swin.py,sha256=Xqr3SswbHP6IhwT2atZMAPF2TUzQqfxvihksb8WSeRo,6065
@@ -133,7 +133,7 @@ rslearn/train/tasks/embedding.py,sha256=NdJEAaDWlWYzvOBVf7eIHfFOzqTgavfFH1J1gMbA
 rslearn/train/tasks/multi_task.py,sha256=32hvwyVsHqt7N_M3zXsTErK1K7-0-BPHzt7iGNehyaI,6314
 rslearn/train/tasks/per_pixel_regression.py,sha256=Clrod6LQGjgNC0IAR4HLY7eCGWMHj2mk4d4moZCl4Qc,10209
 rslearn/train/tasks/regression.py,sha256=bVS_ApZSpbL0NaaM8Mu5Bsu4SBUyLpVtrPslulvvZHs,12695
-rslearn/train/tasks/segmentation.py,sha256=Y3Sm2oOzR3yJCpagwBmp1yCwa024MQN2v1PcpiaWBf8,28425
+rslearn/train/tasks/segmentation.py,sha256=LZeuveHhMQsjNOQfMcwqSI4Ux3k9zfa58A2eZHSif8Y,29391
 rslearn/train/tasks/task.py,sha256=nMPunl9OlnOimr48saeTnwKMQ7Du4syGrwNKVQq4FL4,4110
 rslearn/train/transforms/__init__.py,sha256=BkCAzm4f-8TEhPIuyvCj7eJGh36aMkZFYlq-H_jkSvY,778
 rslearn/train/transforms/concatenate.py,sha256=hVVBaxIdk1Cx8JHPirj54TGpbWAJx5y_xD7k1rmGmT0,3166
@@ -155,17 +155,17 @@ rslearn/utils/get_utm_ups_crs.py,sha256=kUrcyjCK7KWvuP1XR-nURPeRqYeRO-3L8QUJ1QTF
 rslearn/utils/grid_index.py,sha256=hRmrtgpqN1pLa-djnZtgSXqKJlbgGyttGnCEmPLD0zo,2347
 rslearn/utils/jsonargparse.py,sha256=TRyZA151KzhjJlZczIHYguEA-YxCDYaZ2IwCRgx76nM,4791
 rslearn/utils/mp.py,sha256=XYmVckI5TOQuCKc49NJyirDJyFgvb4AI-gGypG2j680,1399
-rslearn/utils/raster_format.py,sha256=qZpbODF4I7BsOxf43O6vTmH2TSNw6N8PP0wsFUVvdIw,26267
+rslearn/utils/raster_format.py,sha256=fwotJBadwqYSdK8UokiKOk1pOF8JMim3kP_VwLWivPI,27382
 rslearn/utils/rtree_index.py,sha256=j0Zwrq3pXuAJ-hKpiRFQ7VNtvO3fZYk-Em2uBPAqfx4,6460
 rslearn/utils/spatial_index.py,sha256=eomJAUgzmjir8j9HZnSgQoJHwN9H0wGTjmJkMkLLfsU,762
 rslearn/utils/sqlite_index.py,sha256=YGOJi66544e6JNtfSft6YIlHklFdSJO2duxQ4TJ2iu4,2920
-rslearn/utils/stac.py,sha256=z93N5ZeEe1oUikX5ILMA5sQEZX276sAeMjsg0TShnSk,5776
+rslearn/utils/stac.py,sha256=c8NwOCKWvUwA-FSKlxZn-t7RZYweuye53OufT0bAK4A,5996
 rslearn/utils/time.py,sha256=2ilSLG94_sxLP3y5RSV5L5CG8CoND_dbdzYEHVtN-I8,387
 rslearn/utils/vector_format.py,sha256=4ZDYpfBLLxguJkiIaavTagiQK2Sv4Rz9NumbHlq-3Lw,15041
-rslearn-0.0.23.dist-info/licenses/LICENSE,sha256=_99ZWPoLdlUbqZoSC5DF4ihiNwl5rTEmBaq2fACecdg,11352
-rslearn-0.0.23.dist-info/licenses/NOTICE,sha256=wLPr6rwV_jCg-xEknNGwhnkfRfuoOE9MZ-lru2yZyLI,5070
-rslearn-0.0.23.dist-info/METADATA,sha256=YFo7HcByJFrlgbSqcCUat2Z7nn1RU0aQzR0InaDSKEg,37936
-rslearn-0.0.23.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
-rslearn-0.0.23.dist-info/entry_points.txt,sha256=doTBQ57NT7nq-dgYGgTTw6mafcGWb_4PWYtYR4rGm50,46
-rslearn-0.0.23.dist-info/top_level.txt,sha256=XDKo90WBH8P9RQumHxo0giLJsoufT4r9odv-WE6Ahk4,8
-rslearn-0.0.23.dist-info/RECORD,,
+rslearn-0.0.24.dist-info/licenses/LICENSE,sha256=_99ZWPoLdlUbqZoSC5DF4ihiNwl5rTEmBaq2fACecdg,11352
+rslearn-0.0.24.dist-info/licenses/NOTICE,sha256=wLPr6rwV_jCg-xEknNGwhnkfRfuoOE9MZ-lru2yZyLI,5070
+rslearn-0.0.24.dist-info/METADATA,sha256=gV5mgeYPYiKWrEu7D8acOubWvg76Nn_4ICvlD7iTpcs,37936
+rslearn-0.0.24.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
+rslearn-0.0.24.dist-info/entry_points.txt,sha256=doTBQ57NT7nq-dgYGgTTw6mafcGWb_4PWYtYR4rGm50,46
+rslearn-0.0.24.dist-info/top_level.txt,sha256=XDKo90WBH8P9RQumHxo0giLJsoufT4r9odv-WE6Ahk4,8
+rslearn-0.0.24.dist-info/RECORD,,

{rslearn-0.0.23.dist-info → rslearn-0.0.24.dist-info}/WHEEL RENAMED Viewed

File without changes

{rslearn-0.0.23.dist-info → rslearn-0.0.24.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{rslearn-0.0.23.dist-info → rslearn-0.0.24.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{rslearn-0.0.23.dist-info → rslearn-0.0.24.dist-info}/licenses/NOTICE RENAMED Viewed

File without changes

{rslearn-0.0.23.dist-info → rslearn-0.0.24.dist-info}/top_level.txt RENAMED Viewed

File without changes

rslearn 0.0.23__py3-none-any.whl → 0.0.24__py3-none-any.whl

rslearn 0.0.23py3-none-any.whl → 0.0.24py3-none-any.whl