PyPI - rslearn - Versions diffs - 0.0.27__py3-none-any.whl → 0.0.29__py3-none-any.whl - Mend

rslearn 0.0.27py3-none-any.whl → 0.0.29py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

rslearn/data_sources/aws_google_satellite_embedding_v1.py +358 -0
rslearn/data_sources/direct_materialize_data_source.py +6 -17
rslearn/data_sources/utils.py +10 -3
rslearn/dataset/storage/file.py +16 -12
rslearn/models/global_pool.py +74 -0
rslearn/tile_stores/default.py +4 -2
rslearn/train/data_module.py +10 -7
rslearn/train/dataset.py +118 -74
rslearn/train/lightning_module.py +59 -3
rslearn/train/metrics.py +162 -0
rslearn/train/tasks/classification.py +13 -0
rslearn/train/tasks/per_pixel_regression.py +19 -6
rslearn/train/tasks/regression.py +18 -2
rslearn/train/tasks/segmentation.py +17 -0
rslearn/utils/fsspec.py +51 -1
{rslearn-0.0.27.dist-info → rslearn-0.0.29.dist-info}/METADATA +1 -1
{rslearn-0.0.27.dist-info → rslearn-0.0.29.dist-info}/RECORD +22 -19
{rslearn-0.0.27.dist-info → rslearn-0.0.29.dist-info}/WHEEL +0 -0
{rslearn-0.0.27.dist-info → rslearn-0.0.29.dist-info}/entry_points.txt +0 -0
{rslearn-0.0.27.dist-info → rslearn-0.0.29.dist-info}/licenses/LICENSE +0 -0
{rslearn-0.0.27.dist-info → rslearn-0.0.29.dist-info}/licenses/NOTICE +0 -0
{rslearn-0.0.27.dist-info → rslearn-0.0.29.dist-info}/top_level.txt +0 -0

rslearn/data_sources/aws_google_satellite_embedding_v1.py ADDED Viewed

@@ -0,0 +1,358 @@
+"""Data source for Google Satellite Embedding V1 dataset on AWS Open Data."""
+import os
+import tempfile
+from collections.abc import Callable
+from datetime import UTC, datetime
+from typing import Any
+import boto3
+import numpy as np
+import numpy.typing as npt
+import pandas as pd
+import rasterio
+import rasterio.vrt
+import shapely
+import shapely.wkt
+from botocore import UNSIGNED
+from botocore.config import Config
+from rasterio.enums import Resampling
+from upath import UPath
+import rslearn.data_sources.utils
+from rslearn.const import WGS84_PROJECTION
+from rslearn.data_sources.data_source import (
+    DataSourceContext,
+    Item,
+    QueryConfig,
+)
+from rslearn.data_sources.direct_materialize_data_source import (
+    DirectMaterializeDataSource,
+)
+from rslearn.utils.fsspec import join_upath
+from rslearn.utils.geometry import PixelBounds, Projection, STGeometry
+from rslearn.utils.grid_index import GridIndex
+# Band names for the 64 embedding channels
+BANDS = [f"A{idx:02d}" for idx in range(64)]
+# S3 bucket configuration
+BUCKET_NAME = "us-west-2.opendata.source.coop"
+BUCKET_PREFIX = "tge-labs/aef/v1/annual"
+INDEX_KEY = f"{BUCKET_PREFIX}/aef_index.csv"
+HTTP_URL_BASE = f"https://s3.us-west-2.amazonaws.com/{BUCKET_NAME}"
+# Grid index cell size for spatial queries
+GRID_SIZE = 1.0
+class GoogleSatelliteEmbeddingV1Item(Item):
+    """An item in the GoogleSatelliteEmbeddingV1 data source."""
+    def __init__(
+        self,
+        name: str,
+        geometry: STGeometry,
+        s3_path: str,
+    ) -> None:
+        """Creates a new GoogleSatelliteEmbeddingV1Item.
+        Args:
+            name: unique name of the item (the filename without extension)
+            geometry: the spatial and temporal extent of the item
+            s3_path: full S3 path to the TIFF file
+        """
+        super().__init__(name, geometry)
+        self.s3_path = s3_path
+    def serialize(self) -> dict:
+        """Serializes the item to a JSON-encodable dictionary."""
+        d = super().serialize()
+        d["s3_path"] = self.s3_path
+        return d
+    @staticmethod
+    def deserialize(d: dict) -> "GoogleSatelliteEmbeddingV1Item":
+        """Deserializes an item from a JSON-decoded dictionary."""
+        item = super(
+            GoogleSatelliteEmbeddingV1Item, GoogleSatelliteEmbeddingV1Item
+        ).deserialize(d)
+        return GoogleSatelliteEmbeddingV1Item(
+            name=item.name,
+            geometry=item.geometry,
+            s3_path=d["s3_path"],
+        )
+class GoogleSatelliteEmbeddingV1(
+    DirectMaterializeDataSource[GoogleSatelliteEmbeddingV1Item]
+):
+    """Data source for Google Satellite Embedding V1 on AWS Open Data.
+    It consists of annual satellite embeddings at 10m resolution with 64 bands
+    (A00-A63). The data is stored as Cloud-Optimized GeoTIFFs organized by year and UTM
+    zone. Each file covers 8192x8192 pixels.
+    Available years: 2018-2024.
+    See https://registry.opendata.aws/aef-source/ for details.
+    """
+    def __init__(
+        self,
+        metadata_cache_dir: str,
+        apply_dequantization: bool = True,
+        context: DataSourceContext = DataSourceContext(),
+    ) -> None:
+        """Initialize a new GoogleSatelliteEmbeddingV1 instance.
+        Args:
+            metadata_cache_dir: directory to cache the index file.
+            apply_dequantization: whether to apply de-quantization to convert
+                int8 values to float32. The raw data is quantized int8; the
+                de-quantization maps values to [-1, 1] using the formula:
+                ((values / 127.5) ** 2) * sign(values). The raw data has nodata value
+                -128 while with dequantization the nodata value is -1.0. See
+                https://source.coop/tge-labs/aef for details.
+            context: the data source context.
+        """
+        # We have a single asset containing all 64 bands. Here "image" is an arbitrary
+        # name, since DirectMaterializeDataSource requires an asset name.
+        super().__init__(asset_bands={"image": BANDS})
+        self.apply_dequantization = apply_dequantization
+        # Set up cache directory
+        if context.ds_path is not None:
+            self.metadata_cache_dir = join_upath(context.ds_path, metadata_cache_dir)
+        else:
+            self.metadata_cache_dir = UPath(metadata_cache_dir)
+        self.metadata_cache_dir.mkdir(parents=True, exist_ok=True)
+        # S3 client with anonymous access (only used for downloading index)
+        self.s3_client = boto3.client(
+            "s3",
+            config=Config(signature_version=UNSIGNED),
+            region_name="us-west-2",
+        )
+        # Lazy-loaded grid index
+        self._grid_index: GridIndex | None = None
+        self._items_by_name: dict[str, GoogleSatelliteEmbeddingV1Item] | None = None
+    def _read_index_csv(self) -> pd.DataFrame:
+        """Read the index CSV, downloading from S3 if not cached.
+        Returns:
+            DataFrame with WKT, path, and year columns.
+        """
+        cache_file = self.metadata_cache_dir / "aef_index.csv"
+        if not cache_file.exists():
+            response = self.s3_client.get_object(Bucket=BUCKET_NAME, Key=INDEX_KEY)
+            content = response["Body"].read()
+            with cache_file.open("wb") as f:
+                f.write(content)
+        return pd.read_csv(
+            cache_file,
+            header=None,
+            usecols=[0, 2, 3],
+            names=["WKT", "path", "year"],
+        )
+    def _load_index(
+        self,
+    ) -> tuple[GridIndex, dict[str, GoogleSatelliteEmbeddingV1Item]]:
+        """Load the index file and build spatial index.
+        Returns:
+            Tuple of (grid_index, items_by_name dict).
+        """
+        if self._grid_index is not None and self._items_by_name is not None:
+            return self._grid_index, self._items_by_name
+        df = self._read_index_csv()
+        grid_index = GridIndex(GRID_SIZE)
+        items_by_name: dict[str, GoogleSatelliteEmbeddingV1Item] = {}
+        for _, row in df.iterrows():
+            shp = shapely.wkt.loads(row["WKT"])
+            year = int(row["year"])
+            time_range = (
+                datetime(year, 1, 1, tzinfo=UTC),
+                datetime(year, 12, 31, 23, 59, 59, tzinfo=UTC),
+            )
+            s3_path = row["path"]
+            name = s3_path.split("/")[-1].replace(".tiff", "")
+            geometry = STGeometry(WGS84_PROJECTION, shp, time_range)
+            item = GoogleSatelliteEmbeddingV1Item(
+                name=name,
+                geometry=geometry,
+                s3_path=s3_path,
+            )
+            grid_index.insert(shp.bounds, item)
+            items_by_name[name] = item
+        self._grid_index = grid_index
+        self._items_by_name = items_by_name
+        return grid_index, items_by_name
+    # --- DataSource implementation ---
+    def get_items(
+        self, geometries: list[STGeometry], query_config: QueryConfig
+    ) -> list[list[list[GoogleSatelliteEmbeddingV1Item]]]:
+        """Get a list of items in the data source intersecting the given geometries."""
+        grid_index, _ = self._load_index()
+        wgs84_geometries = [
+            geometry.to_projection(WGS84_PROJECTION) for geometry in geometries
+        ]
+        groups = []
+        for geometry, wgs84_geometry in zip(geometries, wgs84_geometries):
+            cur_items = []
+            for item in grid_index.query(wgs84_geometry.shp.bounds):
+                if not wgs84_geometry.shp.intersects(item.geometry.shp):
+                    continue
+                # Check time range if specified
+                if wgs84_geometry.time_range is not None:
+                    item_start, item_end = item.geometry.time_range
+                    query_start, query_end = wgs84_geometry.time_range
+                    if item_end < query_start or item_start > query_end:
+                        continue
+                cur_items.append(item)
+            cur_items.sort(key=lambda item: item.geometry.time_range[0])
+            cur_groups: list[list[GoogleSatelliteEmbeddingV1Item]] = (
+                rslearn.data_sources.utils.match_candidate_items_to_window(
+                    geometry, cur_items, query_config
+                )
+            )
+            groups.append(cur_groups)
+        return groups
+    def get_item_by_name(self, name: str) -> GoogleSatelliteEmbeddingV1Item:
+        """Gets an item by name."""
+        _, items_by_name = self._load_index()
+        if name not in items_by_name:
+            raise ValueError(f"item {name} not found")
+        return items_by_name[name]
+    def deserialize_item(self, serialized_item: dict) -> GoogleSatelliteEmbeddingV1Item:
+        """Deserializes an item from JSON-decoded data."""
+        return GoogleSatelliteEmbeddingV1Item.deserialize(serialized_item)
+    def ingest(
+        self,
+        tile_store: Any,
+        items: list[GoogleSatelliteEmbeddingV1Item],
+        geometries: list[list[STGeometry]],
+    ) -> None:
+        """Ingest items into the given tile store.
+        Note: Each file is 2-3GB so this can be slow. Direct materialization via
+        read_raster or materialize is recommended for most use cases.
+        Args:
+            tile_store: the tile store to ingest into
+            items: the items to ingest
+            geometries: a list of geometries needed for each item
+        """
+        for item in items:
+            if tile_store.is_raster_ready(item.name, BANDS):
+                continue
+            # Download the TIFF file directly to disk
+            key = item.s3_path.replace(f"s3://{BUCKET_NAME}/", "")
+            with tempfile.TemporaryDirectory() as tmp_dir:
+                local_path = os.path.join(tmp_dir, f"{item.name}.tiff")
+                self.s3_client.download_file(BUCKET_NAME, key, local_path)
+                tile_store.write_raster_file(item.name, BANDS, UPath(local_path))
+    # --- DirectMaterializeDataSource implementation ---
+    def get_asset_url(self, item_name: str, asset_key: str) -> str:
+        """Get the HTTP URL to read the asset.
+        Returns a /vsicurl/ URL that rasterio can read directly over HTTP.
+        """
+        item = self.get_item_by_name(item_name)
+        # Convert s3://bucket/path to HTTP URL
+        key = item.s3_path.replace(f"s3://{BUCKET_NAME}/", "")
+        return f"/vsicurl/{HTTP_URL_BASE}/{key}"
+    def get_read_callback(
+        self, item_name: str, asset_key: str
+    ) -> Callable[[npt.NDArray[Any]], npt.NDArray[Any]] | None:
+        """Return a callback to apply de-quantization if enabled."""
+        if not self.apply_dequantization:
+            return None
+        def dequantize(data: npt.NDArray[Any]) -> npt.NDArray[np.float32]:
+            # Handle nodata (-128)
+            nodata_mask = data == -128
+            float_data = data.astype(np.float32)
+            # This is the dequantization formula recommended at https://source.coop/tge-labs/aef.
+            result = ((float_data / 127.5) ** 2) * np.sign(float_data)
+            # We make sure that NODATA is exactly -1.0 so user can handle it appropriately.
+            result[nodata_mask] = -1.0
+            return result
+        return dequantize
+    def read_raster(
+        self,
+        layer_name: str,
+        item_name: str,
+        bands: list[str],
+        projection: Projection,
+        bounds: PixelBounds,
+        resampling: Resampling = Resampling.bilinear,
+    ) -> npt.NDArray[Any]:
+        """Read raster data from the store.
+        Overrides base class to handle band selection (the base class reads all bands).
+        """
+        asset_url = self.get_asset_url(item_name, "image")
+        # Determine which band indices to read (1-indexed for rasterio)
+        if bands == BANDS:
+            band_indices = list(range(1, 65))
+        else:
+            band_indices = [BANDS.index(b) + 1 for b in bands]
+        # Construct the transform for the requested bounds
+        wanted_transform = rasterio.transform.Affine(
+            projection.x_resolution,
+            0,
+            bounds[0] * projection.x_resolution,
+            0,
+            projection.y_resolution,
+            bounds[1] * projection.y_resolution,
+        )
+        with rasterio.open(asset_url) as src:
+            with rasterio.vrt.WarpedVRT(
+                src,
+                crs=projection.crs,
+                transform=wanted_transform,
+                width=bounds[2] - bounds[0],
+                height=bounds[3] - bounds[1],
+                resampling=resampling,
+            ) as vrt:
+                data = vrt.read(indexes=band_indices)
+        # Apply callback if dequantization is enabled
+        callback = self.get_read_callback(item_name, "image")
+        if callback is not None:
+            data = callback(data)
+        return data

rslearn/data_sources/direct_materialize_data_source.py CHANGED Viewed

@@ -11,14 +11,16 @@ import rasterio.vrt
 from rasterio.enums import Resampling
 from rslearn.config import LayerConfig
-from rslearn.data_sources.data_source import DataSource, ItemType
+from rslearn.data_sources.data_source import ItemLookupDataSource, ItemType
 from rslearn.dataset import Window
 from rslearn.dataset.materialize import RasterMaterializer
 from rslearn.tile_stores import TileStore, TileStoreWithLayer
 from rslearn.utils.geometry import PixelBounds, Projection
-class DirectMaterializeDataSource(DataSource[ItemType], TileStore, Generic[ItemType]):
+class DirectMaterializeDataSource(
+    ItemLookupDataSource[ItemType], TileStore, Generic[ItemType]
+):
     """Base class for data sources that support direct materialization via TileStore.
     This class provides common TileStore functionality for data sources that can read
@@ -27,9 +29,10 @@ class DirectMaterializeDataSource(DataSource[ItemType], TileStore, Generic[ItemT
     Subclasses must implement:
         - get_asset_url(): Get the URL for an asset given item name and bands
-        - get_item_by_name(): Get an item by its name
     Subclasses may optionally override:
+        - get_item_by_name(): Inherited from ItemLookupDataSource. If also inheriting
+            from a class that provides it (e.g., StacDataSource), no override needed.
         - get_raster_bands(): By default, we assume that items have all assets. If
             items may have a subset of assets, override get_raster_bands to return
             the sets of bands available for that item.
@@ -77,20 +80,6 @@ class DirectMaterializeDataSource(DataSource[ItemType], TileStore, Generic[ItemT
         """
         raise NotImplementedError
-    def get_item_by_name(self, name: str) -> ItemType:
-        """Get an item by its name.
-        Subclasses must implement this method, either directly or by inheriting from
-        a class that provides it (e.g., StacDataSource).
-        Args:
-            name: the name of the item to get.
-        Returns:
-            the item object.
-        """
-        raise NotImplementedError
     # --- Optional hooks for subclasses ---
     def get_read_callback(

rslearn/data_sources/utils.py CHANGED Viewed

@@ -418,7 +418,8 @@ def match_candidate_items_to_window(
             )
     # Now apply space mode.
-    item_shps = []
+    acceptable_items = []
+    acceptable_item_shps = []
     for item in items:
         item_geom = item.geometry
         # We need to re-project items to the geometry projection for the spatial checks
@@ -430,14 +431,20 @@ def match_candidate_items_to_window(
                 item_geom = geometry
             else:
                 item_geom = item_geom.to_projection(geometry.projection)
-        item_shps.append(item_geom.shp)
+        if item_geom.shp.area == 0:
+            # Must have been an item that didn't quite match the window's spatial extent.
+            continue
+        acceptable_items.append(item)
+        acceptable_item_shps.append(item_geom.shp)
     # Dispatch to the appropriate space mode handler
     handler = space_mode_handlers.get(query_config.space_mode)
     if handler is None:
         raise ValueError(f"invalid space mode {query_config.space_mode}")
-    groups = handler(geometry, items, item_shps, query_config)
+    groups = handler(geometry, acceptable_items, acceptable_item_shps, query_config)
     # Enforce minimum matches if set.
     if len(groups) < query_config.min_matches:

rslearn/dataset/storage/file.py CHANGED Viewed

@@ -15,7 +15,7 @@ from rslearn.dataset.window import (
     get_window_layer_dir,
 )
 from rslearn.log_utils import get_logger
-from rslearn.utils.fsspec import open_atomic
+from rslearn.utils.fsspec import iter_nonhidden_subdirs, open_atomic
 from rslearn.utils.mp import star_imap_unordered
 from .storage import WindowStorage, WindowStorageFactory
@@ -77,8 +77,8 @@ class FileWindowStorage(WindowStorage):
         window_dirs = []
         if not groups:
             groups = []
-            for p in (self.path / "windows").iterdir():
-                groups.append(p.name)
+            for group_dir in iter_nonhidden_subdirs(self.path / "windows"):
+                groups.append(group_dir.name)
         for group in groups:
             group_dir = self.path / "windows" / group
             if not group_dir.exists():
@@ -86,16 +86,20 @@ class FileWindowStorage(WindowStorage):
                     f"Skipping group directory {group_dir} since it does not exist"
                 )
                 continue
+            if not group_dir.is_dir():
+                logger.warning(
+                    f"Skipping group path {group_dir} since it is not a directory"
+                )
+                continue
             if names:
-                cur_names = names
+                for window_name in names:
+                    window_dir = group_dir / window_name
+                    if not window_dir.is_dir():
+                        continue
+                    window_dirs.append(window_dir)
             else:
-                cur_names = []
-                for p in group_dir.iterdir():
-                    cur_names.append(p.name)
-            for window_name in cur_names:
-                window_dir = group_dir / window_name
-                window_dirs.append(window_dir)
+                for window_dir in iter_nonhidden_subdirs(group_dir):
+                    window_dirs.append(window_dir)
         if workers == 0:
             windows = [load_window(self, window_dir) for window_dir in window_dirs]
@@ -162,7 +166,7 @@ class FileWindowStorage(WindowStorage):
             return []
         completed_layers = []
-        for layer_dir in layers_directory.iterdir():
+        for layer_dir in iter_nonhidden_subdirs(layers_directory):
             layer_name, group_idx = get_layer_and_group_from_dir_name(layer_dir.name)
             if not self.is_layer_completed(group, name, layer_name, group_idx):
                 continue

rslearn/models/global_pool.py ADDED Viewed

@@ -0,0 +1,74 @@
+"""Global pooling decoder for spatial feature maps."""
+from typing import Any, Literal
+import torch
+from rslearn.train.model_context import ModelContext
+from .component import FeatureMaps, FeatureVector, IntermediateComponent
+class GlobalPool(IntermediateComponent):
+    """Apply global pooling to reduce spatial dimensions.
+    This component applies global average or max pooling over the spatial dimensions
+    of input feature maps. By default, it produces FeatureVector (BxC) suitable for
+    ClassificationHead or RegressionHead. When keep_spatial_dims=True, it produces
+    1x1 FeatureMaps suitable for EmbeddingHead.
+    """
+    def __init__(
+        self,
+        mode: Literal["mean", "max"] = "mean",
+        keep_spatial_dims: bool = False,
+    ) -> None:
+        """Create a new GlobalPool.
+        Args:
+            mode: the pooling mode, either "mean" for global average pooling or
+                "max" for global max pooling. Defaults to "mean".
+            keep_spatial_dims: if True, returns FeatureMaps with 1x1 spatial dimensions.
+                If False (default), returns FeatureVector (BxC). Defaults to False.
+        """
+        super().__init__()
+        if mode not in ("mean", "max"):
+            raise ValueError(f"mode must be 'mean' or 'max', got '{mode}'")
+        self.mode = mode
+        self.keep_spatial_dims = keep_spatial_dims
+    def forward(
+        self, intermediates: Any, context: ModelContext
+    ) -> FeatureMaps | FeatureVector:
+        """Apply global pooling on the feature maps.
+        Args:
+            intermediates: output from the previous model component, which must be
+                a FeatureMaps.
+            context: the model context.
+        Returns:
+            If keep_spatial_dims=False (default): FeatureVector (BxC) suitable for
+                ClassificationHead or RegressionHead.
+            If keep_spatial_dims=True: FeatureMaps with 1x1 spatial dimensions suitable
+                for EmbeddingHead.
+        """
+        if not isinstance(intermediates, FeatureMaps):
+            raise ValueError("input to GlobalPool must be FeatureMaps")
+        pooled_features = []
+        for feat in intermediates.feature_maps:
+            # feat is BCHW
+            if self.mode == "mean":
+                pooled = feat.mean(dim=(2, 3), keepdim=self.keep_spatial_dims)
+            else:
+                pooled = torch.amax(feat, dim=(2, 3), keepdim=self.keep_spatial_dims)
+            pooled_features.append(pooled)
+        if self.keep_spatial_dims:
+            return FeatureMaps(pooled_features)
+        else:
+            if len(pooled_features) == 1:
+                return FeatureVector(pooled_features[0])
+            else:
+                return FeatureVector(torch.cat(pooled_features, dim=1))

rslearn/tile_stores/default.py CHANGED Viewed

@@ -15,6 +15,8 @@ from upath import UPath
 from rslearn.const import WGS84_PROJECTION
 from rslearn.utils.feature import Feature
 from rslearn.utils.fsspec import (
+    iter_nonhidden_files,
+    iter_nonhidden_subdirs,
     join_upath,
     open_atomic,
     open_rasterio_upath_reader,
@@ -129,7 +131,7 @@ class DefaultTileStore(TileStore):
             ValueError: if no file is found.
         """
         raster_dir = self._get_raster_dir(layer_name, item_name, bands)
-        for fname in raster_dir.iterdir():
+        for fname in iter_nonhidden_files(raster_dir):
             # Ignore completed sentinel files, bands files, as well as temporary files created by
             # open_atomic (in case this tile store is on local filesystem).
             if fname.name == COMPLETED_FNAME:
@@ -175,7 +177,7 @@ class DefaultTileStore(TileStore):
             return []
         bands: list[list[str]] = []
-        for raster_dir in item_dir.iterdir():
+        for raster_dir in iter_nonhidden_subdirs(item_dir):
             if not (raster_dir / BANDS_FNAME).exists():
                 # This is likely a legacy directory where the bands are only encoded in
                 # the directory name, so we have to rely on that.

rslearn/train/data_module.py CHANGED Viewed

@@ -108,10 +108,10 @@ class RslearnDataModule(L.LightningDataModule):
         self.use_in_memory_all_crops_dataset = use_in_memory_all_crops_dataset
         self.index_mode = index_mode
         self.split_configs = {
-            "train": default_config.update(train_config),
-            "val": default_config.update(val_config),
-            "test": default_config.update(test_config),
-            "predict": default_config.update(predict_config),
+            "train": SplitConfig.merge_and_validate([default_config, train_config]),
+            "val": SplitConfig.merge_and_validate([default_config, val_config]),
+            "test": SplitConfig.merge_and_validate([default_config, test_config]),
+            "predict": SplitConfig.merge_and_validate([default_config, predict_config]),
         }
     def setup(
@@ -141,7 +141,7 @@ class RslearnDataModule(L.LightningDataModule):
                 task=self.task,
                 workers=self.init_workers,
                 name=self.name,
-                fix_patch_pick=(split != "train"),
+                fix_crop_pick=(split != "train"),
                 index_mode=self.index_mode,
             )
             logger.info(f"got {len(dataset)} examples in split {split}")
@@ -203,13 +203,16 @@ class RslearnDataModule(L.LightningDataModule):
         # Enable persistent workers unless we are using main process.
         persistent_workers = self.num_workers > 0
-        # If using all patches, limit number of workers to the number of windows.
+        # If using all crops, limit number of workers to the number of windows.
         # Otherwise it has to distribute the same window to different workers which can
         # cause issues for RslearnWriter.
         # If the number of windows is 0, then we can set positive number of workers
         # since they won't yield anything anyway.
         num_workers = self.num_workers
-        if split_config.load_all_crops and len(dataset.get_dataset_examples()) > 0:
+        if (
+            split_config.get_load_all_crops()
+            and len(dataset.get_dataset_examples()) > 0
+        ):
             num_workers = min(num_workers, len(dataset.get_dataset_examples()))
         kwargs: dict[str, Any] = dict(

rslearn 0.0.27__py3-none-any.whl → 0.0.29__py3-none-any.whl

rslearn 0.0.27py3-none-any.whl → 0.0.29py3-none-any.whl