PyPI - rslearn - Versions diffs - 0.0.4__tar.gz → 0.0.6__tar.gz - Mend

rslearn 0.0.4tar.gz → 0.0.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (145) hide show

{rslearn-0.0.4/rslearn.egg-info → rslearn-0.0.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: rslearn
-Version: 0.0.4
+Version: 0.0.6
 Summary: A library for developing remote sensing datasets and models
 Author: OlmoEarth Team
 License:                                  Apache License
@@ -205,6 +205,9 @@ License:                                  Apache License
            See the License for the specific language governing permissions and
            limitations under the License.
+Project-URL: homepage, https://github.com/allenai/rslearn
+Project-URL: issues, https://github.com/allenai/rslearn/issues
+Project-URL: repository, https://github.com/allenai/rslearn
 Requires-Python: >=3.11
 Description-Content-Type: text/markdown
 License-File: LICENSE

{rslearn-0.0.4 → rslearn-0.0.6}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "rslearn"
-version = "0.0.4"
+version = "0.0.6"
 description = "A library for developing remote sensing datasets and models"
 authors = [
     { name = "OlmoEarth Team" },
@@ -8,7 +8,6 @@ authors = [
 readme = "README.md"
 license = {file = "LICENSE"}
 requires-python = ">=3.11"
 dependencies = [
     "boto3>=1.39",
     "class_registry>=2.1",
@@ -67,6 +66,11 @@ dev = [
     "pytest-xdist",
 ]
+[project.urls]
+homepage = "https://github.com/allenai/rslearn"
+issues = "https://github.com/allenai/rslearn/issues"
+repository = "https://github.com/allenai/rslearn"
 [build-system]
 requires = ["setuptools>=61"]
 build-backend = "setuptools.build_meta"

{rslearn-0.0.4 → rslearn-0.0.6}/rslearn/arg_parser.py RENAMED Viewed

@@ -1,33 +1,12 @@
 """Custom Lightning ArgumentParser with environment variable substitution support."""
 import os
-import re
 from typing import Any
 from jsonargparse import Namespace
 from lightning.pytorch.cli import LightningArgumentParser
-def substitute_env_vars_in_string(content: str) -> str:
-    """Substitute environment variables in a string.
-    Replaces ${VAR_NAME} patterns with os.getenv(VAR_NAME, "") values.
-    This works on raw string content before YAML parsing.
-    Args:
-        content: The string content containing template variables
-    Returns:
-        The string with environment variables substituted
-    """
-    pattern = r"\$\{([^}]+)\}"
-    def replace_variable(match_obj: re.Match[str]) -> str:
-        var_name = match_obj.group(1)
-        env_value = os.getenv(var_name, "")
-        return env_value if env_value is not None else ""
-    return re.sub(pattern, replace_variable, content)
+from rslearn.template_params import substitute_env_vars_in_string
 class RslearnArgumentParser(LightningArgumentParser):

{rslearn-0.0.4 → rslearn-0.0.6}/rslearn/data_sources/copernicus.py RENAMED Viewed

@@ -319,7 +319,6 @@ class Copernicus(DataSource):
                 then we attempt to read the username/password from COPERNICUS_USERNAME
                 and COPERNICUS_PASSWORD (this is useful since access tokens are only
                 valid for an hour).
-            password: set API username/password instead of access token.
             query_filter: filter string to include when searching for items. This will
                 be appended to other name, geographic, and sensing time filters where
                 applicable. For example, "Collection/Name eq 'SENTINEL-2'". See the API
@@ -368,6 +367,7 @@ class Copernicus(DataSource):
             "order_by",
             "sort_by",
             "sort_desc",
+            "timeout",
         ]
         for k in simple_optionals:
             if k in d:
@@ -709,6 +709,8 @@ class Sentinel2(Copernicus):
         "B12": ["B12"],
         "B8A": ["B8A"],
         "TCI": ["R", "G", "B"],
+        # L1C-only products.
+        "B10": ["B10"],
         # L2A-only products.
         "AOT": ["AOT"],
         "WVP": ["WVP"],
@@ -809,17 +811,16 @@ class Sentinel2(Copernicus):
         kwargs: dict[str, Any] = dict(
             assets=list(needed_assets),
+            product_type=Sentinel2ProductType[d["product_type"]],
         )
-        if "product_type" in d:
-            kwargs["product_type"] = Sentinel2ProductType(d["product_type"])
         simple_optionals = [
             "harmonize",
             "access_token",
             "order_by",
             "sort_by",
             "sort_desc",
+            "timeout",
         ]
         for k in simple_optionals:
             if k in d:
@@ -965,6 +966,7 @@ class Sentinel1(Copernicus):
             "order_by",
             "sort_by",
             "sort_desc",
+            "timeout",
         ]
         for k in simple_optionals:
             if k in d:

rslearn-0.0.6/rslearn/data_sources/eurocrops.py ADDED Viewed

@@ -0,0 +1,246 @@
+"""Data source for vector EuroCrops crop type data."""
+import glob
+import os
+import tempfile
+import zipfile
+from datetime import UTC, datetime, timedelta
+from typing import Any
+import fiona
+import requests
+from rasterio.crs import CRS
+from upath import UPath
+from rslearn.config import QueryConfig, VectorLayerConfig
+from rslearn.const import WGS84_PROJECTION
+from rslearn.data_sources import DataSource, Item
+from rslearn.data_sources.utils import match_candidate_items_to_window
+from rslearn.log_utils import get_logger
+from rslearn.tile_stores import TileStoreWithLayer
+from rslearn.utils.feature import Feature
+from rslearn.utils.geometry import Projection, STGeometry, get_global_geometry
+logger = get_logger(__name__)
+class EuroCropsItem(Item):
+    """An item in the EuroCrops data source.
+    For simplicity, we have just one item per year, so each item combines all of the
+    country-level files for that year.
+    """
+    def __init__(self, name: str, geometry: STGeometry, zip_fnames: list[str]):
+        """Creates a new EuroCropsItem.
+        Args:
+            name: unique name of the item. It is just the year that this item
+                corresponds to.
+            geometry: the spatial and temporal extent of the item
+            zip_fnames: the filenames of the zip files that contain country-level crop
+                type data for this year.
+        """
+        super().__init__(name, geometry)
+        self.zip_fnames = zip_fnames
+    def serialize(self) -> dict:
+        """Serializes the item to a JSON-encodable dictionary."""
+        d = super().serialize()
+        d["zip_fnames"] = self.zip_fnames
+        return d
+    @staticmethod
+    def deserialize(d: dict) -> "EuroCropsItem":
+        """Deserializes an item from a JSON-decoded dictionary."""
+        item = super(EuroCropsItem, EuroCropsItem).deserialize(d)
+        return EuroCropsItem(
+            name=item.name, geometry=item.geometry, zip_fnames=d["zip_fnames"]
+        )
+class EuroCrops(DataSource[EuroCropsItem]):
+    """A data source for EuroCrops vector data (v11).
+    See https://zenodo.org/records/14094196 for details.
+    While the source data is split into country-level files, this data source uses one
+    item per year for simplicity. So each item corresponds to all of the country-level
+    files for that year.
+    Note that the RO_ny.zip file is not used.
+    """
+    BASE_URL = "https://zenodo.org/records/14094196/files/"
+    FILENAMES_BY_YEAR = {
+        2018: [
+            "FR_2018.zip",
+        ],
+        2019: [
+            "DK_2019.zip",
+        ],
+        2020: [
+            "ES_NA_2020.zip",
+            "FI_2020.zip",
+            "HR_2020.zip",
+            "NL_2020.zip",
+        ],
+        2021: [
+            "AT_2021.zip",
+            "BE_VLG_2021.zip",
+            "BE_WAL_2021.zip",
+            "EE_2021.zip",
+            "LT_2021.zip",
+            "LV_2021.zip",
+            "PT_2021.zip",
+            "SE_2021.zip",
+            "SI_2021.zip",
+            "SK_2021.zip",
+        ],
+        2023: [
+            "CZ_2023.zip",
+            "DE_BB_2023.zip",
+            "DE_LS_2021.zip",
+            "DE_NRW_2021.zip",
+            "ES_2023.zip",
+            "IE_2023.zip",
+        ],
+    }
+    TIMEOUT = timedelta(seconds=10)
+    @staticmethod
+    def from_config(config: VectorLayerConfig, ds_path: UPath) -> "EuroCrops":
+        """Creates a new EuroCrops instance from a configuration dictionary."""
+        if config.data_source is None:
+            raise ValueError("data_source is required")
+        return EuroCrops()
+    def _get_all_items(self) -> list[EuroCropsItem]:
+        """Get a list of all available items in the data source."""
+        items: list[EuroCropsItem] = []
+        for year, fnames in self.FILENAMES_BY_YEAR.items():
+            items.append(
+                EuroCropsItem(
+                    str(year),
+                    get_global_geometry(
+                        time_range=(
+                            datetime(year, 1, 1, tzinfo=UTC),
+                            datetime(year + 1, 1, 1, tzinfo=UTC),
+                        ),
+                    ),
+                    fnames,
+                )
+            )
+        return items
+    def get_items(
+        self, geometries: list[STGeometry], query_config: QueryConfig
+    ) -> list[list[list[EuroCropsItem]]]:
+        """Get a list of items in the data source intersecting the given geometries.
+        Args:
+            geometries: the spatiotemporal geometries
+            query_config: the query configuration
+        Returns:
+            List of groups of items that should be retrieved for each geometry.
+        """
+        wgs84_geometries = [
+            geometry.to_projection(WGS84_PROJECTION) for geometry in geometries
+        ]
+        all_items = self._get_all_items()
+        groups = []
+        for geometry in wgs84_geometries:
+            cur_groups = match_candidate_items_to_window(
+                geometry, all_items, query_config
+            )
+            groups.append(cur_groups)
+        return groups
+    def deserialize_item(self, serialized_item: Any) -> EuroCropsItem:
+        """Deserializes an item from JSON-decoded data."""
+        return EuroCropsItem.deserialize(serialized_item)
+    def _extract_features(self, fname: str) -> list[Feature]:
+        """Download the given zip file, extract shapefile, and return list of features."""
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            # Download the zip file.
+            url = self.BASE_URL + fname
+            logger.debug(f"Downloading zip file from {url}")
+            response = requests.get(
+                url,
+                stream=True,
+                timeout=self.TIMEOUT.total_seconds(),
+                allow_redirects=False,
+            )
+            response.raise_for_status()
+            zip_fname = os.path.join(tmp_dir, "data.zip")
+            with open(zip_fname, "wb") as f:
+                for chunk in response.iter_content(chunk_size=8192):
+                    f.write(chunk)
+            # Extract all of the files and look for shapefile filename.
+            logger.debug(f"Extracting zip file {fname}")
+            with zipfile.ZipFile(zip_fname) as zip_f:
+                zip_f.extractall(path=tmp_dir)
+            # The shapefiles or geopackage files can appear at any level in the hierarchy.
+            # Most zip files contain one but some contain multiple (one per region).
+            shp_fnames = glob.glob(
+                "**/*.shp", root_dir=tmp_dir, recursive=True
+            ) + glob.glob("**/*.gpkg", root_dir=tmp_dir, recursive=True)
+            if len(shp_fnames) == 0:
+                tmp_dir_fnames = os.listdir(tmp_dir)
+                raise ValueError(
+                    f"expected {fname} to contain .shp file but none found (matches={shp_fnames}, ls={tmp_dir_fnames})"
+                )
+            # Load the features from the shapefile(s).
+            features = []
+            for shp_fname in shp_fnames:
+                logger.debug(f"Loading feature list from {shp_fname}")
+                with fiona.open(os.path.join(tmp_dir, shp_fname)) as src:
+                    crs = CRS.from_wkt(src.crs.to_wkt())
+                    # Normal GeoJSON should have coordinates in CRS coordinates, i.e. it
+                    # should be 1 projection unit/pixel.
+                    projection = Projection(crs, 1, 1)
+                    for feat in src:
+                        features.append(
+                            Feature.from_geojson(
+                                projection,
+                                {
+                                    "type": "Feature",
+                                    "geometry": dict(feat.geometry),
+                                    "properties": dict(feat.properties),
+                                },
+                            )
+                        )
+            return features
+    def ingest(
+        self,
+        tile_store: TileStoreWithLayer,
+        items: list[EuroCropsItem],
+        geometries: list[list[STGeometry]],
+    ) -> None:
+        """Ingest items into the given tile store.
+        Args:
+            tile_store: the tile store to ingest into
+            items: the items to ingest
+            geometries: a list of geometries needed for each item
+        """
+        for item in items:
+            if tile_store.is_vector_ready(item.name):
+                continue
+            # Get features across all shapefiles.
+            features: list[Feature] = []
+            for fname in item.zip_fnames:
+                logger.debug(f"Getting features from {fname} for item {item.name}")
+                features.extend(self._extract_features(fname))
+            logger.debug(f"Writing features for {item.name} to the tile store")
+            tile_store.write_vector(item.name, features)

{rslearn-0.0.4 → rslearn-0.0.6}/rslearn/data_sources/local_files.py RENAMED Viewed

@@ -232,6 +232,17 @@ class RasterImporter(Importer):
                 projection = Projection(crs, x_resolution, y_resolution)
                 geometry = STGeometry(projection, shp, None)
+            if geometry.is_too_large():
+                geometry = get_global_geometry(time_range=None)
+                logger.warning(
+                    "Global geometry detected: this geometry will be matched against all "
+                    "windows in the rslearn dataset. When using settings like "
+                    "max_matches=1 and space_mode=MOSAIC, this may cause windows outside "
+                    "the geometry’s valid bounds to be materialized from the global raster "
+                    "instead of a more appropriate source. Consider using COMPOSITE mode, "
+                    "or increasing max_matches if this behavior is unintended."
+                )
             if spec.name:
                 item_name = spec.name
             else:

{rslearn-0.0.4 → rslearn-0.0.6}/rslearn/data_sources/openstreetmap.py RENAMED Viewed

@@ -1,4 +1,4 @@
-"""Data source for raster data on public Cloud Storage buckets."""
+"""Data source for OpenStreetMap vector features."""
 import json
 import shutil
@@ -392,7 +392,7 @@ class OpenStreetMap(DataSource[OsmItem]):
         bounds_fname: UPath,
         categories: dict[str, Filter],
     ):
-        """Initialize a new Sentinel2 instance.
+        """Initialize a new OpenStreetMap instance.
         Args:
             config: the configuration of this layer.
@@ -508,8 +508,6 @@ class OpenStreetMap(DataSource[OsmItem]):
             items: the items to ingest
             geometries: a list of geometries needed for each item
         """
-        item_names = [item.name for item in items]
-        item_names.sort()
         for cur_item, cur_geometries in zip(items, geometries):
             if tile_store.is_vector_ready(cur_item.name):
                 continue

{rslearn-0.0.4 → rslearn-0.0.6}/rslearn/dataset/dataset.py RENAMED Viewed

@@ -8,6 +8,7 @@ from upath import UPath
 from rslearn.config import load_layer_config
 from rslearn.log_utils import get_logger
+from rslearn.template_params import substitute_env_vars_in_string
 from rslearn.tile_stores import TileStore, load_tile_store
 from .index import DatasetIndex
@@ -52,7 +53,9 @@ class Dataset:
         # Load dataset configuration.
         with (self.path / "config.json").open("r") as f:
-            config = json.load(f)
+            config_content = f.read()
+            config_content = substitute_env_vars_in_string(config_content)
+            config = json.loads(config_content)
             self.layers = {}
             for layer_name, d in config["layers"].items():
                 # Layer names must not contain period, since we use period to

rslearn-0.0.6/rslearn/models/copernicusfm.py ADDED Viewed

@@ -0,0 +1,216 @@
+"""Copernicus FM model."""
+import logging
+import math
+from enum import Enum
+import torch
+import torch.nn.functional as F
+from einops import rearrange
+from upath import UPath
+from .copernicusfm_src.model_vit import vit_base_patch16
+logger = logging.getLogger(__name__)
+class CopernicusFMModality(Enum):
+    """Modality for Copernicus FM."""
+    SENTINEL2_L2A = "sentinel2_l2a"
+    SENTINEL1 = "sentinel1"
+MODALITY_TO_WAVELENGTH_BANDWIDTHS: dict[str, dict[str, list]] = {
+    # https://github.com/zhu-xlab/Copernicus-FM/blob/main/Copernicus-Bench/src/configs/dataset/cobench_eurosat_s2.yaml
+    CopernicusFMModality.SENTINEL2_L2A.value: {
+        "band_names": [
+            "B01",
+            "B02",
+            "B03",
+            "B04",
+            "B05",
+            "B06",
+            "B07",
+            "B08",
+            "B8A",
+            "B09",
+            "B10",
+            "B11",
+            "B12",
+        ],
+        "band_wavelengths": [
+            440,
+            490,
+            560,
+            665,
+            705,
+            740,
+            783,
+            842,
+            860,
+            940,
+            1370,
+            1610,
+            2190,
+        ],
+        "band_bandwidths": [20, 65, 35, 30, 15, 15, 20, 115, 20, 20, 30, 90, 180],
+    },
+    # https://github.com/zhu-xlab/Copernicus-FM/blob/main/Copernicus-Bench/src/configs/dataset/cobench_eurosat_s1.yaml
+    CopernicusFMModality.SENTINEL1.value: {
+        "band_names": ["vv", "vh"],
+        "band_wavelengths": [50000000, 50000000],
+        "band_bandwidths": [1e9, 1e9],
+    },
+}
+class CopernicusFM(torch.nn.Module):
+    """Wrapper for Copernicus FM to ingest Masked Helios Sample."""
+    image_resolution = 224
+    patch_size = 16
+    input_mode = "spectral"
+    # Don't need this as band order is provided
+    supported_modalities = [
+        CopernicusFMModality.SENTINEL2_L2A.value,
+        CopernicusFMModality.SENTINEL1.value,
+    ]
+    def __init__(
+        self,
+        band_order: dict[str, list[str]],
+        load_directory: str | None,
+    ) -> None:
+        """Initialize the Copernicus FM wrapper.
+        Args:
+            band_order: The band order for each modality
+            load_directory: The directory to load from, if None no weights are loaded
+        """
+        super().__init__()
+        # global_pool=True so that we initialize the fc_norm layer
+        self.band_order = band_order
+        self.model = vit_base_patch16(num_classes=10, global_pool=True)
+        if load_directory is not None:
+            check_point = torch.load(
+                UPath(load_directory) / "CopernicusFM_ViT_base_varlang_e100.pth",
+                weights_only=True,
+            )
+            if "model" in check_point:
+                state_dict = check_point["model"]
+            else:
+                state_dict = check_point
+            self.model.load_state_dict(state_dict, strict=False)
+        # take MODALITY_TO_WAVELENGTH_BANDWIDTHS and rearrage it so that it has the same
+        # ordering as the Helios band orders, defined by Modality.band_order
+        self.modality_to_wavelength_bandwidths = {}
+        for modality in self.supported_modalities:
+            wavelength_bandwidths = MODALITY_TO_WAVELENGTH_BANDWIDTHS[modality]
+            wavelengths = []
+            bandwidths = []
+            modality_band_order = self.band_order.get(modality, None)
+            if modality_band_order is None:
+                logger.warning(
+                    f"Band order for modality {modality} not found in band_order dictionary, unable to use this modality unless specified"
+                )
+                continue
+            for b in modality_band_order:
+                cfm_idx = wavelength_bandwidths["band_names"].index(b)
+                wavelengths.append(wavelength_bandwidths["band_wavelengths"][cfm_idx])
+                bandwidths.append(wavelength_bandwidths["band_bandwidths"][cfm_idx])
+            self.modality_to_wavelength_bandwidths[modality] = {
+                "band_bandwidths": bandwidths,
+                "band_wavelengths": wavelengths,
+            }
+    def _resize_data(self, data: torch.Tensor) -> torch.Tensor:
+        """Process individual modality data.
+        Args:
+            data: Input tensor of shape [B, C, H, W]
+        Returns:
+            list of tensors of shape [B, C, H, W]
+        """
+        # Get original dimensions
+        original_height = data.shape[2]
+        new_height = self.patch_size if original_height == 1 else self.image_resolution
+        data = F.interpolate(
+            data,
+            size=(new_height, new_height),
+            mode="bilinear",
+            align_corners=False,
+        )
+        return data
+    def prepare_input(
+        self,
+        inputs: dict[str, torch.Tensor],
+    ) -> tuple[torch.Tensor, list[int], list[int]]:
+        """Prepare input for the CopernicusFM model from MaskedHeliosSample."""
+        wavelengths: list[int] = []
+        bandwidths: list[int] = []
+        all_processed_data: list[list[torch.Tensor]] = []
+        for modality in inputs.keys():
+            if modality not in self.supported_modalities:
+                logger.debug(
+                    f"Skipping modality {modality} as it is not in the supported "
+                    f"modalities list {self.supported_modalities}"
+                )
+                continue
+            data = inputs[modality]
+            if data is None:
+                continue
+            all_processed_data.append(self._resize_data(data))
+            wavelengths.extend(
+                self.modality_to_wavelength_bandwidths[modality]["band_wavelengths"]
+            )
+            bandwidths.extend(
+                self.modality_to_wavelength_bandwidths[modality]["band_bandwidths"]
+            )
+        concatenated_processed_data = torch.cat(all_processed_data, dim=1)
+        return concatenated_processed_data, wavelengths, bandwidths
+    def forward(
+        self,
+        inputs: list[dict[str, torch.Tensor]],
+    ) -> torch.Tensor:
+        """Forward pass through CopernicusFM model."""
+        batch_inputs = {
+            key: torch.stack([inp[key] for inp in inputs], dim=0)
+            for key in inputs[0].keys()
+        }
+        # Prepare input
+        data, wavelengths, bandwidths = self.prepare_input(batch_inputs)
+        meta = torch.full(
+            (1, 4), float("nan"), device=data.device
+        )  # [lon, lat, delta_time, patch_token_area], assume unknown
+        # "The embed tensor contains the encoded image features, which can be used for downstream tasks."
+        _, timestep_output = self.model(
+            data,
+            meta,
+            wavelengths,
+            bandwidths,
+            None,
+            self.input_mode,
+            self.patch_size,
+        )
+        # no norm, following
+        # https://github.com/zhu-xlab/Copernicus-FM/blob/main/Copernicus-Bench/src/foundation_models/CopernicusFM/models_dwv_seg.py
+        side = math.isqrt(timestep_output.shape[1])
+        output_features = rearrange(
+            timestep_output, "b (h w) c -> b c h w ", h=side, w=side
+        )
+        return [output_features]
+    def get_backbone_channels(self) -> list[tuple[int, int]]:
+        """Returns the output channels of this model when used as a backbone."""
+        # TODO: load this from a constant depending on the model size
+        return [(self.patch_size, 768)]

rslearn-0.0.6/rslearn/models/copernicusfm_src/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ # mypy: ignore-errors

rslearn 0.0.4__tar.gz → 0.0.6__tar.gz

rslearn 0.0.4tar.gz → 0.0.6tar.gz