PyPI - rslearn - Versions diffs - 0.0.19__tar.gz → 0.0.21__tar.gz - Mend

rslearn 0.0.19tar.gz → 0.0.21tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (173) hide show

{rslearn-0.0.19/rslearn.egg-info → rslearn-0.0.21}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: rslearn
-Version: 0.0.19
+Version: 0.0.21
 Summary: A library for developing remote sensing datasets and models
 Author: OlmoEarth Team
 License:                                  Apache License

{rslearn-0.0.19 → rslearn-0.0.21}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "rslearn"
-version = "0.0.19"
+version = "0.0.21"
 description = "A library for developing remote sensing datasets and models"
 authors = [
     { name = "OlmoEarth Team" },

{rslearn-0.0.19 → rslearn-0.0.21}/rslearn/data_sources/climate_data_store.py RENAMED Viewed

@@ -24,51 +24,55 @@ from rslearn.utils.geometry import STGeometry
 logger = get_logger(__name__)
-class ERA5LandMonthlyMeans(DataSource):
-    """A data source for ingesting ERA5 land monthly averaged data from the Copernicus Climate Data Store.
+class ERA5Land(DataSource):
+    """Base class for ingesting ERA5 land data from the Copernicus Climate Data Store.
     An API key must be passed either in the configuration or via the CDSAPI_KEY
     environment variable. You can acquire an API key by going to the Climate Data Store
     website (https://cds.climate.copernicus.eu/), registering an account and logging
-    in, and then
+    in, and then getting the API key from the user profile page.
     The band names should match CDS variable names (see the reference at
     https://confluence.ecmwf.int/display/CKB/ERA5-Land%3A+data+documentation). However,
     replace "_" with "-" in the variable names when specifying bands in the layer
     configuration.
-    This data source corresponds to the reanalysis-era5-land-monthly-means product.
-    All requests to the API will be for the whole globe. Although the API supports arbitrary
-    bounds in the requests, using the whole available area helps to reduce the total number of
-    requests.
+    By default, all requests to the API will be for the whole globe. To speed up ingestion,
+    we recommend specifying the bounds of the area of interest, in particular for hourly data.
     """
     api_url = "https://cds.climate.copernicus.eu/api"
-    # see: https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-land-monthly-means
-    DATASET = "reanalysis-era5-land-monthly-means"
-    PRODUCT_TYPE = "monthly_averaged_reanalysis"
     DATA_FORMAT = "netcdf"
     DOWNLOAD_FORMAT = "unarchived"
     PIXEL_SIZE = 0.1  # degrees, native resolution is 9km
     def __init__(
         self,
+        dataset: str,
+        product_type: str,
         band_names: list[str] | None = None,
         api_key: str | None = None,
+        bounds: list[float] | None = None,
         context: DataSourceContext = DataSourceContext(),
     ):
-        """Initialize a new ERA5LandMonthlyMeans instance.
+        """Initialize a new ERA5Land instance.
         Args:
+            dataset: the CDS dataset name (e.g., "reanalysis-era5-land-monthly-means").
+            product_type: the CDS product type (e.g., "monthly_averaged_reanalysis").
             band_names: list of band names to acquire. These should correspond to CDS
                 variable names but with "_" replaced with "-". This will only be used
                 if the layer config is missing from the context.
             api_key: the API key. If not set, it should be set via the CDSAPI_KEY
                 environment variable.
+            bounds: optional bounding box as [min_lon, min_lat, max_lon, max_lat].
+                If not specified, the whole globe will be used.
             context: the data source context.
         """
+        self.dataset = dataset
+        self.product_type = product_type
+        self.bounds = bounds
         self.band_names: list[str]
         if context.layer_config is not None:
             self.band_names = []
@@ -134,8 +138,11 @@ class ERA5LandMonthlyMeans(DataSource):
                 # Collect Item list corresponding to the current month.
                 items = []
                 item_name = f"era5land_monthlyaveraged_{cur_date.year}_{cur_date.month}"
-                # Space is the whole globe.
-                bounds = (-180, -90, 180, 90)
+                # Use bounds if set, otherwise use whole globe
+                if self.bounds is not None:
+                    bounds = self.bounds
+                else:
+                    bounds = [-180, -90, 180, 90]
                 # Time is just the given month.
                 start_date = datetime(cur_date.year, cur_date.month, 1, tzinfo=UTC)
                 time_range = (
@@ -172,7 +179,9 @@ class ERA5LandMonthlyMeans(DataSource):
         # But the list of variables should include the bands we want in the correct
         # order. And we can distinguish those bands from other "variables" because they
         # will be 3D while the others will be scalars or 1D.
-        bands_data = []
+        band_arrays = []
+        num_time_steps = None
         for band_name in nc.variables:
             band_data = nc.variables[band_name]
             if len(band_data.shape) != 3:
@@ -182,18 +191,27 @@ class ERA5LandMonthlyMeans(DataSource):
             logger.debug(
                 f"NC file {nc_path} has variable {band_name} with shape {band_data.shape}"
             )
-            # Variable data is stored in a 3D array (1, height, width)
-            if band_data.shape[0] != 1:
+            # Variable data is stored in a 3D array (time, height, width)
+            # For hourly data, time is number of days in the month x 24 hours
+            if num_time_steps is None:
+                num_time_steps = band_data.shape[0]
+            elif band_data.shape[0] != num_time_steps:
                 raise ValueError(
-                    f"Bad shape for band {band_name}, expected 1 band but got {band_data.shape[0]}"
+                    f"Variable {band_name} has {band_data.shape[0]} time steps, "
+                    f"but expected {num_time_steps}"
                 )
-            bands_data.append(band_data[0, :, :])
+            # Original shape: (time, height, width)
+            band_array = np.array(band_data[:])
+            band_array = np.expand_dims(band_array, axis=1)
+            band_arrays.append(band_array)
-        array = np.array(bands_data)  # (num_bands, height, width)
-        if array.shape[0] != len(self.band_names):
-            raise ValueError(
-                f"Expected to get {len(self.band_names)} bands but got {array.shape[0]}"
-            )
+        # After concatenation: (time, num_variables, height, width)
+        stacked_array = np.concatenate(band_arrays, axis=1)
+        # After reshaping: (time x num_variables, height, width)
+        array = stacked_array.reshape(
+            -1, stacked_array.shape[2], stacked_array.shape[3]
+        )
         # Get metadata for the GeoTIFF
         lat = nc.variables["latitude"][:]
@@ -235,6 +253,58 @@ class ERA5LandMonthlyMeans(DataSource):
         ) as dst:
             dst.write(array)
+    def ingest(
+        self,
+        tile_store: TileStoreWithLayer,
+        items: list[Item],
+        geometries: list[list[STGeometry]],
+    ) -> None:
+        """Ingest items into the given tile store.
+        This method should be overridden by subclasses.
+        Args:
+            tile_store: the tile store to ingest into
+            items: the items to ingest
+            geometries: a list of geometries needed for each item
+        """
+        raise NotImplementedError("Subclasses must implement ingest method")
+class ERA5LandMonthlyMeans(ERA5Land):
+    """A data source for ingesting ERA5 land monthly averaged data from the Copernicus Climate Data Store.
+    This data source corresponds to the reanalysis-era5-land-monthly-means product.
+    """
+    def __init__(
+        self,
+        band_names: list[str] | None = None,
+        api_key: str | None = None,
+        bounds: list[float] | None = None,
+        context: DataSourceContext = DataSourceContext(),
+    ):
+        """Initialize a new ERA5LandMonthlyMeans instance.
+        Args:
+            band_names: list of band names to acquire. These should correspond to CDS
+                variable names but with "_" replaced with "-". This will only be used
+                if the layer config is missing from the context.
+            api_key: the API key. If not set, it should be set via the CDSAPI_KEY
+                environment variable.
+            bounds: optional bounding box as [min_lon, min_lat, max_lon, max_lat].
+                If not specified, the whole globe will be used.
+            context: the data source context.
+        """
+        super().__init__(
+            dataset="reanalysis-era5-land-monthly-means",
+            product_type="monthly_averaged_reanalysis",
+            band_names=band_names,
+            api_key=api_key,
+            bounds=bounds,
+            context=context,
+        )
     def ingest(
         self,
         tile_store: TileStoreWithLayer,
@@ -256,25 +326,142 @@ class ERA5LandMonthlyMeans(DataSource):
                 continue
             # Send the request to the CDS API
-            # If area is not specified, the whole globe will be requested
+            if self.bounds is not None:
+                min_lon, min_lat, max_lon, max_lat = self.bounds
+                area = [max_lat, min_lon, min_lat, max_lon]
+            else:
+                area = [90, -180, -90, 180]  # Whole globe
             request = {
-                "product_type": [self.PRODUCT_TYPE],
+                "product_type": [self.product_type],
                 "variable": variable_names,
                 "year": [f"{item.geometry.time_range[0].year}"],  # type: ignore
                 "month": [
                     f"{item.geometry.time_range[0].month:02d}"  # type: ignore
                 ],
                 "time": ["00:00"],
+                "area": area,
+                "data_format": self.DATA_FORMAT,
+                "download_format": self.DOWNLOAD_FORMAT,
+            }
+            logger.debug(
+                f"CDS API request for year={request['year']} month={request['month']} area={area}"
+            )
+            with tempfile.TemporaryDirectory() as tmp_dir:
+                local_nc_fname = os.path.join(tmp_dir, f"{item.name}.nc")
+                local_tif_fname = os.path.join(tmp_dir, f"{item.name}.tif")
+                self.client.retrieve(self.dataset, request, local_nc_fname)
+                self._convert_nc_to_tif(
+                    UPath(local_nc_fname),
+                    UPath(local_tif_fname),
+                )
+                tile_store.write_raster_file(
+                    item.name, self.band_names, UPath(local_tif_fname)
+                )
+class ERA5LandHourly(ERA5Land):
+    """A data source for ingesting ERA5 land hourly data from the Copernicus Climate Data Store.
+    This data source corresponds to the reanalysis-era5-land product.
+    """
+    def __init__(
+        self,
+        band_names: list[str] | None = None,
+        api_key: str | None = None,
+        bounds: list[float] | None = None,
+        context: DataSourceContext = DataSourceContext(),
+    ):
+        """Initialize a new ERA5LandHourly instance.
+        Args:
+            band_names: list of band names to acquire. These should correspond to CDS
+                variable names but with "_" replaced with "-". This will only be used
+                if the layer config is missing from the context.
+            api_key: the API key. If not set, it should be set via the CDSAPI_KEY
+                environment variable.
+            bounds: optional bounding box as [min_lon, min_lat, max_lon, max_lat].
+                If not specified, the whole globe will be used.
+            context: the data source context.
+        """
+        super().__init__(
+            dataset="reanalysis-era5-land",
+            product_type="reanalysis",
+            band_names=band_names,
+            api_key=api_key,
+            bounds=bounds,
+            context=context,
+        )
+    def ingest(
+        self,
+        tile_store: TileStoreWithLayer,
+        items: list[Item],
+        geometries: list[list[STGeometry]],
+    ) -> None:
+        """Ingest items into the given tile store.
+        Args:
+            tile_store: the tile store to ingest into
+            items: the items to ingest
+            geometries: a list of geometries needed for each item
+        """
+        # for CDS variable names, replace "-" with "_"
+        variable_names = [band.replace("-", "_") for band in self.band_names]
+        for item in items:
+            if tile_store.is_raster_ready(item.name, self.band_names):
+                continue
+            # Send the request to the CDS API
+            # If area is not specified, the whole globe will be requested
+            time_range = item.geometry.time_range
+            if time_range is None:
+                raise ValueError("Item must have a time range")
+            # For hourly data, request all days in the month and all 24 hours
+            start_time = time_range[0]
+            # Get all days in the month
+            year = start_time.year
+            month = start_time.month
+            # Get the last day of the month
+            if month == 12:
+                last_day = 31
+            else:
+                next_month = datetime(year, month + 1, 1, tzinfo=UTC)
+                last_day = (next_month - relativedelta(days=1)).day
+            days = [f"{day:02d}" for day in range(1, last_day + 1)]
+            # Get all 24 hours
+            hours = [f"{hour:02d}:00" for hour in range(24)]
+            if self.bounds is not None:
+                min_lon, min_lat, max_lon, max_lat = self.bounds
+                area = [max_lat, min_lon, min_lat, max_lon]
+            else:
+                area = [90, -180, -90, 180]  # Whole globe
+            request = {
+                "product_type": [self.product_type],
+                "variable": variable_names,
+                "year": [f"{year}"],
+                "month": [f"{month:02d}"],
+                "day": days,
+                "time": hours,
+                "area": area,
                 "data_format": self.DATA_FORMAT,
                 "download_format": self.DOWNLOAD_FORMAT,
             }
             logger.debug(
-                f"CDS API request for the whole globe for year={request['year']} month={request['month']}"
+                f"CDS API request for year={request['year']} month={request['month']} days={len(days)} hours={len(hours)} area={area}"
             )
             with tempfile.TemporaryDirectory() as tmp_dir:
                 local_nc_fname = os.path.join(tmp_dir, f"{item.name}.nc")
                 local_tif_fname = os.path.join(tmp_dir, f"{item.name}.tif")
-                self.client.retrieve(self.DATASET, request, local_nc_fname)
+                self.client.retrieve(self.dataset, request, local_nc_fname)
                 self._convert_nc_to_tif(
                     UPath(local_nc_fname),
                     UPath(local_tif_fname),

{rslearn-0.0.19 → rslearn-0.0.21}/rslearn/models/anysat.py RENAMED Viewed

@@ -4,6 +4,8 @@ This code loads the AnySat model from torch hub. See
 https://github.com/gastruc/AnySat for applicable license and copyright information.
 """
+from datetime import datetime
 import torch
 from einops import rearrange
@@ -53,7 +55,6 @@ class AnySat(FeatureExtractor):
         self,
         modalities: list[str],
         patch_size_meters: int,
-        dates: dict[str, list[int]],
         output: str = "patch",
         output_modality: str | None = None,
         hub_repo: str = "gastruc/anysat",
@@ -85,14 +86,6 @@ class AnySat(FeatureExtractor):
             if m not in MODALITY_RESOLUTIONS:
                 raise ValueError(f"Invalid modality: {m}")
-        if not all(m in TIME_SERIES_MODALITIES for m in dates.keys()):
-            raise ValueError("`dates` keys must be time-series modalities only.")
-        for m in modalities:
-            if m in TIME_SERIES_MODALITIES and m not in dates:
-                raise ValueError(
-                    f"Missing required dates for time-series modality '{m}'."
-                )
         if patch_size_meters % 10 != 0:
             raise ValueError(
                 "In AnySat, `patch_size` is in meters and must be a multiple of 10."
@@ -106,7 +99,6 @@ class AnySat(FeatureExtractor):
         self.modalities = modalities
         self.patch_size_meters = int(patch_size_meters)
-        self.dates = dates
         self.output = output
         self.output_modality = output_modality
@@ -119,6 +111,20 @@ class AnySat(FeatureExtractor):
         )
         self._embed_dim = 768  # base width, 'dense' returns 2x
+    @staticmethod
+    def time_ranges_to_doy(
+        time_ranges: list[tuple[datetime, datetime]],
+        device: torch.device,
+    ) -> torch.Tensor:
+        """Turn the time ranges stored in a RasterImage to timestamps accepted by AnySat.
+        AnySat uses the doy with each timestamp, so we take the midpoint
+        the time range. For some inputs (e.g. Sentinel 2) we take an image from a specific
+        time so that start_time == end_time == mid_time.
+        """
+        doys = [(t[0] + ((t[1] - t[0]) / 2)).timetuple().tm_yday for t in time_ranges]
+        return torch.tensor(doys, dtype=torch.int32, device=device)
     def forward(self, context: ModelContext) -> FeatureMaps:
         """Forward pass for the AnySat model.
@@ -139,17 +145,29 @@ class AnySat(FeatureExtractor):
                 raise ValueError(f"Modality '{modality}' not present in inputs.")
             cur = torch.stack(
-                [inp[modality] for inp in inputs], dim=0
-            )  # (B, C, H, W) or (B, T*C, H, W)
+                [inp[modality].image for inp in inputs], dim=0
+            )  # (B, C, T, H, W)
             if modality in TIME_SERIES_MODALITIES:
-                num_dates = len(self.dates[modality])
-                num_bands = cur.shape[1] // num_dates
-                cur = rearrange(
-                    cur, "b (t c) h w -> b t c h w", t=num_dates, c=num_bands
-                )
+                num_bands = cur.shape[1]
+                cur = rearrange(cur, "b c t h w -> b t c h w")
                 H, W = cur.shape[-2], cur.shape[-1]
+                if inputs[0][modality].timestamps is None:
+                    raise ValueError(
+                        f"Require timestamps for time series modality {modality}"
+                    )
+                timestamps = torch.stack(
+                    [
+                        self.time_ranges_to_doy(inp[modality].timestamps, cur.device)  # type: ignore
+                        for inp in inputs
+                    ],
+                    dim=0,
+                )
+                batch[f"{modality}_dates"] = timestamps
             else:
+                # take the first (assumed only) timestep
+                cur = cur[:, :, 0]
                 num_bands = cur.shape[1]
                 H, W = cur.shape[-2], cur.shape[-1]
@@ -173,22 +191,6 @@ class AnySat(FeatureExtractor):
                     "All modalities must share the same spatial extent (H*res, W*res)."
                 )
-        # Add *_dates
-        to_add = {}
-        for modality, x in list(batch.items()):
-            if modality in TIME_SERIES_MODALITIES:
-                B, T = x.shape[0], x.shape[1]
-                d = torch.as_tensor(
-                    self.dates[modality], dtype=torch.long, device=x.device
-                )
-                if d.ndim != 1 or d.numel() != T:
-                    raise ValueError(
-                        f"dates for '{modality}' must be 1D length {T}, got {tuple(d.shape)}"
-                    )
-                to_add[f"{modality}_dates"] = d.unsqueeze(0).repeat(B, 1)
-        batch.update(to_add)
         kwargs = {"patch_size": self.patch_size_meters, "output": self.output}
         if self.output == "dense":
             kwargs["output_modality"] = self.output_modality

{rslearn-0.0.19 → rslearn-0.0.21}/rslearn/models/clip.py RENAMED Viewed

@@ -43,9 +43,12 @@ class CLIP(FeatureExtractor):
             a FeatureMaps with one feature map from the ViT, which is always Bx24x24x1024.
         """
         inputs = context.inputs
-        device = inputs[0]["image"].device
+        device = inputs[0]["image"].image.device
         clip_inputs = self.processor(
-            images=[inp["image"].cpu().numpy().transpose(1, 2, 0) for inp in inputs],
+            images=[
+                inp["image"].single_ts_to_chw_tensor().cpu().numpy().transpose(1, 2, 0)
+                for inp in inputs
+            ],
             return_tensors="pt",
             padding=True,
         )

{rslearn-0.0.19 → rslearn-0.0.21}/rslearn/models/croma.py RENAMED Viewed

@@ -175,10 +175,16 @@ class Croma(FeatureExtractor):
         sentinel1: torch.Tensor | None = None
         sentinel2: torch.Tensor | None = None
         if self.modality in [CromaModality.BOTH, CromaModality.SENTINEL1]:
-            sentinel1 = torch.stack([inp["sentinel1"] for inp in context.inputs], dim=0)
+            sentinel1 = torch.stack(
+                [inp["sentinel1"].single_ts_to_chw_tensor() for inp in context.inputs],
+                dim=0,
+            )
             sentinel1 = self._resize_image(sentinel1) if self.do_resizing else sentinel1
         if self.modality in [CromaModality.BOTH, CromaModality.SENTINEL2]:
-            sentinel2 = torch.stack([inp["sentinel2"] for inp in context.inputs], dim=0)
+            sentinel2 = torch.stack(
+                [inp["sentinel2"].single_ts_to_chw_tensor() for inp in context.inputs],
+                dim=0,
+            )
             sentinel2 = self._resize_image(sentinel2) if self.do_resizing else sentinel2
         outputs = self.model(
@@ -294,5 +300,7 @@ class CromaNormalize(Transform):
         for modality in MODALITY_BANDS.keys():
             if modality not in input_dict:
                 continue
-            input_dict[modality] = self.apply_image(input_dict[modality], modality)
+            input_dict[modality].image = self.apply_image(
+                input_dict[modality].image, modality
+            )
         return input_dict, target_dict

{rslearn-0.0.19 → rslearn-0.0.21}/rslearn/models/dinov3.py RENAMED Viewed

@@ -104,7 +104,8 @@ class DinoV3(FeatureExtractor):
             a FeatureMaps with one feature map.
         """
         cur = torch.stack(
-            [inp["image"] for inp in context.inputs], dim=0
+            [inp["image"].single_ts_to_chw_tensor() for inp in context.inputs],
+            dim=0,
         )  # (B, C, H, W)
         if self.do_resizing and (

{rslearn-0.0.19 → rslearn-0.0.21}/rslearn/models/faster_rcnn.py RENAMED Viewed

@@ -210,7 +210,8 @@ class FasterRCNN(Predictor):
                     ),
                 )
-        image_list = [inp["image"] for inp in context.inputs]
+        # take the first (and assumed to be only) timestep
+        image_list = [inp["image"].image[:, 0] for inp in context.inputs]
         images, targets = self.noop_transform(image_list, targets)
         feature_dict = collections.OrderedDict()

rslearn 0.0.19__tar.gz → 0.0.21__tar.gz

rslearn 0.0.19tar.gz → 0.0.21tar.gz