PyPI - rslearn - Versions diffs - 0.0.25__py3-none-any.whl → 0.0.27__py3-none-any.whl - Mend

rslearn 0.0.25py3-none-any.whl → 0.0.27py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

rslearn/config/dataset.py +30 -23
rslearn/data_sources/__init__.py +2 -0
rslearn/data_sources/aws_landsat.py +44 -161
rslearn/data_sources/aws_open_data.py +2 -4
rslearn/data_sources/aws_sentinel1.py +1 -3
rslearn/data_sources/aws_sentinel2_element84.py +54 -165
rslearn/data_sources/climate_data_store.py +1 -3
rslearn/data_sources/copernicus.py +1 -2
rslearn/data_sources/data_source.py +1 -1
rslearn/data_sources/direct_materialize_data_source.py +336 -0
rslearn/data_sources/earthdaily.py +52 -155
rslearn/data_sources/earthdatahub.py +425 -0
rslearn/data_sources/eurocrops.py +1 -2
rslearn/data_sources/gcp_public_data.py +1 -2
rslearn/data_sources/google_earth_engine.py +1 -2
rslearn/data_sources/hf_srtm.py +595 -0
rslearn/data_sources/local_files.py +3 -3
rslearn/data_sources/openstreetmap.py +1 -1
rslearn/data_sources/planet.py +1 -2
rslearn/data_sources/planet_basemap.py +1 -2
rslearn/data_sources/planetary_computer.py +183 -186
rslearn/data_sources/soilgrids.py +3 -3
rslearn/data_sources/stac.py +1 -2
rslearn/data_sources/usda_cdl.py +1 -3
rslearn/data_sources/usgs_landsat.py +7 -254
rslearn/data_sources/utils.py +204 -64
rslearn/data_sources/worldcereal.py +1 -1
rslearn/data_sources/worldcover.py +1 -1
rslearn/data_sources/worldpop.py +1 -1
rslearn/data_sources/xyz_tiles.py +5 -9
rslearn/dataset/materialize.py +5 -1
rslearn/models/clay/clay.py +3 -3
rslearn/models/concatenate_features.py +6 -1
rslearn/models/detr/detr.py +4 -1
rslearn/models/dinov3.py +0 -1
rslearn/models/olmoearth_pretrain/model.py +3 -1
rslearn/models/pooling_decoder.py +1 -1
rslearn/models/prithvi.py +0 -1
rslearn/models/simple_time_series.py +97 -35
rslearn/train/{all_patches_dataset.py → all_crops_dataset.py} +120 -117
rslearn/train/data_module.py +32 -27
rslearn/train/dataset.py +260 -117
rslearn/train/dataset_index.py +156 -0
rslearn/train/lightning_module.py +1 -1
rslearn/train/model_context.py +19 -3
rslearn/train/prediction_writer.py +69 -41
rslearn/train/tasks/classification.py +1 -1
rslearn/train/tasks/detection.py +5 -5
rslearn/train/tasks/per_pixel_regression.py +13 -13
rslearn/train/tasks/regression.py +1 -1
rslearn/train/tasks/segmentation.py +26 -13
rslearn/train/transforms/concatenate.py +17 -27
rslearn/train/transforms/crop.py +8 -19
rslearn/train/transforms/flip.py +4 -10
rslearn/train/transforms/mask.py +9 -15
rslearn/train/transforms/normalize.py +31 -82
rslearn/train/transforms/pad.py +7 -13
rslearn/train/transforms/resize.py +5 -22
rslearn/train/transforms/select_bands.py +16 -36
rslearn/train/transforms/sentinel1.py +4 -16
rslearn/utils/__init__.py +2 -0
rslearn/utils/geometry.py +21 -0
rslearn/utils/m2m_api.py +251 -0
rslearn/utils/retry_session.py +43 -0
{rslearn-0.0.25.dist-info → rslearn-0.0.27.dist-info}/METADATA +6 -3
{rslearn-0.0.25.dist-info → rslearn-0.0.27.dist-info}/RECORD +71 -66
rslearn/data_sources/earthdata_srtm.py +0 -282
{rslearn-0.0.25.dist-info → rslearn-0.0.27.dist-info}/WHEEL +0 -0
{rslearn-0.0.25.dist-info → rslearn-0.0.27.dist-info}/entry_points.txt +0 -0
{rslearn-0.0.25.dist-info → rslearn-0.0.27.dist-info}/licenses/LICENSE +0 -0
{rslearn-0.0.25.dist-info → rslearn-0.0.27.dist-info}/licenses/NOTICE +0 -0
{rslearn-0.0.25.dist-info → rslearn-0.0.27.dist-info}/top_level.txt +0 -0

rslearn/train/{all_patches_dataset.py → all_crops_dataset.py} RENAMED Viewed

@@ -1,4 +1,4 @@
-"""Wrapper around ModelDataset to load all patches (crops) in a window."""
+"""Wrapper around ModelDataset to load all crops in a window."""
 import itertools
 from collections.abc import Iterable, Iterator
@@ -14,70 +14,78 @@ from rslearn.train.model_context import RasterImage, SampleMetadata
 from rslearn.utils.geometry import PixelBounds, STGeometry
-def get_window_patch_options(
-    patch_size: tuple[int, int],
+def get_window_crop_options(
+    crop_size: tuple[int, int],
     overlap_size: tuple[int, int],
     bounds: PixelBounds,
 ) -> list[PixelBounds]:
-    """Get the bounds of each input patch within the window bounds.
+    """Get the bounds of each input crop within the window bounds.
-    This is used when running inference on all patches (crops) of a large window, to
-    compute the position of each patch.
+    This is used when running inference on all crops of a large window, to
+    compute the position of each crop.
     Args:
-        patch_size: the size of the patches to extract.
-        overlap_size: the size of the overlap between patches.
-        bounds: the window bounds to divide up into smaller patches.
+        crop_size: the size of the crops to extract.
+        overlap_size: the size of the overlap between crops.
+        bounds: the window bounds to divide up into smaller crops.
     Returns:
-        a list of patch bounds within the overall bounds. The rightmost and
-            bottommost patches may extend beyond the provided bounds.
+        a list of crop bounds within the overall bounds. The rightmost and
+            bottommost crops may extend beyond the provided bounds.
     """
-    # We stride the patches by patch_size - overlap_size until the last patch.
-    # We handle the first patch with a special case to ensure it is always used.
-    # We handle the last patch with a special case to ensure it does not exceed the
-    # window bounds. Instead, it may overlap the previous patch.
+    # We stride the crops by (crop_size - overlap_size) until the last crop.
+    # The first crop always starts at bounds[0]/bounds[1]. It's okay if it extends
+    # beyond the window bounds since pad_slice_protect pads the tensors.
+    # We handle the last crop with a special case to ensure it does not exceed the
+    # window bounds. Instead, it may overlap the previous crop.
+    # Here is a simple 1D example:
+    # - Suppose bounds is [0, 15] with crop_size=8, overlap_size=2
+    # - Then the first crop should be [0, 8] (from first crop special case)
+    # - There will only be one crop in the middle, [6, 14]
+    # - And the last crop will be at [7, 15]
+    # - Note that, if the bounds was [0, 14], we will only have the first/last crop
+    #   special cases with no crops in the middle from the range(...).
     cols = [bounds[0]] + list(
         range(
-            bounds[0] + patch_size[0],
-            bounds[2] - patch_size[0],
-            patch_size[0] - overlap_size[0],
+            bounds[0] + crop_size[0] - overlap_size[0],
+            bounds[2] - crop_size[0],
+            crop_size[0] - overlap_size[0],
         )
     )
     rows = [bounds[1]] + list(
         range(
-            bounds[1] + patch_size[1],
-            bounds[3] - patch_size[1],
-            patch_size[1] - overlap_size[1],
+            bounds[1] + crop_size[1] - overlap_size[1],
+            bounds[3] - crop_size[1],
+            crop_size[1] - overlap_size[1],
         )
     )
-    # Add last patches only if the input is larger than one patch.
-    if bounds[2] - patch_size[0] > bounds[0]:
-        cols.append(bounds[2] - patch_size[0])
-    if bounds[3] - patch_size[1] > bounds[1]:
-        rows.append(bounds[3] - patch_size[1])
+    # Add last crops only if the input is larger than one crop.
+    if bounds[2] - crop_size[0] > bounds[0]:
+        cols.append(bounds[2] - crop_size[0])
+    if bounds[3] - crop_size[1] > bounds[1]:
+        rows.append(bounds[3] - crop_size[1])
-    patch_bounds: list[PixelBounds] = []
+    crop_bounds: list[PixelBounds] = []
     for col in cols:
         for row in rows:
-            patch_bounds.append((col, row, col + patch_size[0], row + patch_size[1]))
-    return patch_bounds
+            crop_bounds.append((col, row, col + crop_size[0], row + crop_size[1]))
+    return crop_bounds
 def pad_slice_protect(
     raw_inputs: dict[str, Any],
     passthrough_inputs: dict[str, Any],
-    patch_size: tuple[int, int],
+    crop_size: tuple[int, int],
     inputs: dict[str, DataInput],
 ) -> tuple[dict[str, Any], dict[str, Any]]:
-    """Pad tensors in-place by patch size to protect slicing near right/bottom edges.
+    """Pad tensors in-place by crop size to protect slicing near right/bottom edges.
     The padding is scaled based on each input's resolution_factor.
     Args:
         raw_inputs: the raw inputs to pad.
         passthrough_inputs: the passthrough inputs to pad.
-        patch_size: the size of the patches to extract (at window resolution).
+        crop_size: the size of the crops to extract (at window resolution).
         inputs: the DataInput definitions, used to get resolution_factor per input.
     Returns:
@@ -91,8 +99,8 @@ def pad_slice_protect(
             rf = inputs[input_name].resolution_factor
             scale = rf.numerator / rf.denominator
             # Scale the padding amount
-            scaled_pad_x = int(patch_size[0] * scale)
-            scaled_pad_y = int(patch_size[1] * scale)
+            scaled_pad_x = int(crop_size[0] * scale)
+            scaled_pad_y = int(crop_size[1] * scale)
             d[input_name] = torch.nn.functional.pad(
                 value, pad=(0, scaled_pad_x, 0, scaled_pad_y)
             )
@@ -123,12 +131,12 @@ def crop_tensor_or_rasterimage(
         )
-class IterableAllPatchesDataset(torch.utils.data.IterableDataset):
-    """This wraps a ModelDataset to iterate over all patches in that dataset.
+class IterableAllCropsDataset(torch.utils.data.IterableDataset):
+    """This wraps a ModelDataset to iterate over all crops in that dataset.
-    This should be used when SplitConfig.load_all_patches is enabled. The ModelDataset
-    is configured with no patch size (load entire windows), and the dataset is wrapped
-    in an AllPatchesDataset.
+    This should be used when SplitConfig.load_all_crops is enabled. The ModelDataset
+    is configured with no crop size (load entire windows), and the dataset is wrapped
+    in an AllCropsDataset.
     Similar to DistributedSampler, we add extra samples at each rank to ensure
     consistent number of batches across all ranks.
@@ -137,29 +145,27 @@ class IterableAllPatchesDataset(torch.utils.data.IterableDataset):
     def __init__(
         self,
         dataset: ModelDataset,
-        patch_size: tuple[int, int],
-        overlap_ratio: float = 0.0,
+        crop_size: tuple[int, int],
+        overlap_pixels: int = 0,
         rank: int = 0,
         world_size: int = 1,
     ):
-        """Create a new IterableAllPatchesDataset.
+        """Create a new IterableAllCropsDataset.
         Args:
             dataset: the ModelDataset to wrap.
-            patch_size: the size of the patches to extract.
-            overlap_ratio: whether to include overlap between the patches. Note that
-                the right/bottom-most patches may still overlap since we ensure that
-                all patches are contained in the window bounds.
+            crop_size: the size of the crops to extract.
+            overlap_pixels: the number of pixels shared between adjacent crops. Note
+                that the right/bottom-most crops may still overlap with other crops even
+                if overlap_pixels=0 since we ensure that all crops are contained in the
+                window bounds.
             rank: the global rank of this train worker process.
             world_size: the total number of train worker processes.
         """
         super().__init__()
         self.dataset = dataset
-        self.patch_size = patch_size
-        self.overlap_size = (
-            round(self.patch_size[0] * overlap_ratio),
-            round(self.patch_size[1] * overlap_ratio),
-        )
+        self.crop_size = crop_size
+        self.overlap_size = (overlap_pixels, overlap_pixels)
         self.rank = rank
         self.world_size = world_size
         self.windows = self.dataset.get_dataset_examples()
@@ -173,17 +179,17 @@ class IterableAllPatchesDataset(torch.utils.data.IterableDataset):
         """
         self.dataset.set_name(name)
-    def get_window_num_patches(self, bounds: PixelBounds) -> int:
-        """Get the number of patches for these bounds.
+    def get_window_num_crops(self, bounds: PixelBounds) -> int:
+        """Get the number of crops for these bounds.
-        This corresponds to the length of the list returned by get_patch_options.
+        This corresponds to the length of the list returned by get_window_crop_options.
         """
         num_cols = (
             len(
                 range(
                     bounds[0],
-                    bounds[2] - self.patch_size[0],
-                    self.patch_size[0] - self.overlap_size[0],
+                    bounds[2] - self.crop_size[0],
+                    self.crop_size[0] - self.overlap_size[0],
                 )
             )
             + 1
@@ -192,8 +198,8 @@ class IterableAllPatchesDataset(torch.utils.data.IterableDataset):
             len(
                 range(
                     bounds[1],
-                    bounds[3] - self.patch_size[1],
-                    self.patch_size[1] - self.overlap_size[1],
+                    bounds[3] - self.crop_size[1],
+                    self.crop_size[1] - self.overlap_size[1],
                 )
             )
             + 1
@@ -235,14 +241,14 @@ class IterableAllPatchesDataset(torch.utils.data.IterableDataset):
         ]
         # Now compute the maximum number of samples across workers.
-        max_num_patches = 0
+        max_num_crops = 0
         for worker_windows in windows_by_worker:
-            worker_num_patches = 0
+            worker_num_crops = 0
             for window_id in worker_windows:
-                worker_num_patches += self.get_window_num_patches(
+                worker_num_crops += self.get_window_num_crops(
                     self.windows[window_id].bounds
                 )
-            max_num_patches = max(max_num_patches, worker_num_patches)
+            max_num_crops = max(max_num_crops, worker_num_crops)
         # Each worker needs at least one window, otherwise it won't be able to pad.
         # Unless there are zero windows total, which is fine.
@@ -252,17 +258,17 @@ class IterableAllPatchesDataset(torch.utils.data.IterableDataset):
         # window in the end.
         # So now we raise an error instead, and require the number of workers to be
         # less than the number of windows.
-        if len(windows_by_worker[global_worker_id]) == 0 and max_num_patches > 0:
+        if len(windows_by_worker[global_worker_id]) == 0 and max_num_crops > 0:
             raise ValueError(
                 f"the number of workers {global_num_workers} must be <= the number of windows {len(self.windows)}"
             )
-        return (windows_by_worker[global_worker_id], max_num_patches)
+        return (windows_by_worker[global_worker_id], max_num_crops)
     def __iter__(
         self,
     ) -> Iterator[tuple[dict[str, Any], dict[str, Any], SampleMetadata]]:
-        """Iterate over all patches in each element of the underlying ModelDataset."""
+        """Iterate over all crops in each element of the underlying ModelDataset."""
         # Iterate over the window IDs until we have returned enough samples.
         window_ids, num_samples_needed = self._get_worker_iteration_data()
         num_samples_returned = 0
@@ -272,32 +278,32 @@ class IterableAllPatchesDataset(torch.utils.data.IterableDataset):
                 raw_inputs, passthrough_inputs, metadata = self.dataset.get_raw_inputs(
                     window_id
                 )
-                bounds = metadata.patch_bounds
+                bounds = metadata.crop_bounds
-                # For simplicity, pad tensors by patch size to ensure that any patch bounds
+                # For simplicity, pad tensors by crop size to ensure that any crop bounds
                 # extending outside the window bounds will not have issues when we slice
                 # the tensors later. Padding is scaled per-input based on resolution_factor.
                 pad_slice_protect(
-                    raw_inputs, passthrough_inputs, self.patch_size, self.inputs
+                    raw_inputs, passthrough_inputs, self.crop_size, self.inputs
                 )
-                # Now iterate over the patches and extract/yield the crops.
+                # Now iterate over the crops and extract/yield them.
                 # Note that, in case user is leveraging RslearnWriter, it is important that
-                # the patch_idx be increasing (as we iterate) within one window.
-                patches = get_window_patch_options(
-                    self.patch_size, self.overlap_size, bounds
+                # the crop_idx be increasing (as we iterate) within one window.
+                crops = get_window_crop_options(
+                    self.crop_size, self.overlap_size, bounds
                 )
-                for patch_idx, patch_bounds in enumerate(patches):
+                for crop_idx, crop_bounds in enumerate(crops):
                     cur_geom = STGeometry(
-                        metadata.projection, shapely.box(*patch_bounds), None
+                        metadata.projection, shapely.box(*crop_bounds), None
                     )
                     start_offset = (
-                        patch_bounds[0] - bounds[0],
-                        patch_bounds[1] - bounds[1],
+                        crop_bounds[0] - bounds[0],
+                        crop_bounds[1] - bounds[1],
                     )
                     end_offset = (
-                        patch_bounds[2] - bounds[0],
-                        patch_bounds[3] - bounds[1],
+                        crop_bounds[2] - bounds[0],
+                        crop_bounds[3] - bounds[1],
                     )
                     # Define a helper function to handle each input dict.
@@ -339,9 +345,9 @@ class IterableAllPatchesDataset(torch.utils.data.IterableDataset):
                     # Adjust the metadata as well.
                     cur_metadata = replace(
                         metadata,
-                        patch_bounds=patch_bounds,
-                        patch_idx=patch_idx,
-                        num_patches_in_window=len(patches),
+                        crop_bounds=crop_bounds,
+                        crop_idx=crop_idx,
+                        num_crops_in_window=len(crops),
                     )
                     # Now we can compute input and target dicts via the task.
@@ -369,37 +375,34 @@ class IterableAllPatchesDataset(torch.utils.data.IterableDataset):
         return self.dataset.get_dataset_examples()
-class InMemoryAllPatchesDataset(torch.utils.data.Dataset):
-    """This wraps a ModelDataset to iterate over all patches in that dataset.
+class InMemoryAllCropsDataset(torch.utils.data.Dataset):
+    """This wraps a ModelDataset to iterate over all crops in that dataset.
-    This should be used when SplitConfig.load_all_patches is enabled.
+    This should be used when SplitConfig.load_all_crops is enabled.
-    This is a simpler version of IterableAllPatchesDataset that caches all windows in memory.
+    This is a simpler version of IterableAllCropsDataset that caches all windows in memory.
     This is useful for small datasets that fit in memory.
     """
     def __init__(
         self,
         dataset: ModelDataset,
-        patch_size: tuple[int, int],
-        overlap_ratio: float = 0.0,
+        crop_size: tuple[int, int],
+        overlap_pixels: int = 0,
     ):
-        """Create a new InMemoryAllPatchesDataset.
+        """Create a new InMemoryAllCropsDataset.
         Args:
             dataset: the ModelDataset to wrap.
-            patch_size: the size of the patches to extract.
-            overlap_ratio: whether to include overlap between the patches. Note that
-                the right/bottom-most patches may still overlap since we ensure that
-                all patches are contained in the window bounds.
+            crop_size: the size of the crops to extract.
+            overlap_pixels: the number of pixels shared between adjacent crops. Note
+                that the right/bottom-most crops may still overlap since we ensure that
+                all crops are contained in the window bounds.
         """
         super().__init__()
         self.dataset = dataset
-        self.patch_size = patch_size
-        self.overlap_size = (
-            round(self.patch_size[0] * overlap_ratio),
-            round(self.patch_size[1] * overlap_ratio),
-        )
+        self.crop_size = crop_size
+        self.overlap_size = (overlap_pixels, overlap_pixels)
         self.windows = self.dataset.get_dataset_examples()
         self.inputs = dataset.inputs
         self.window_cache: dict[
@@ -407,23 +410,23 @@ class InMemoryAllPatchesDataset(torch.utils.data.Dataset):
         ] = {}
         # Precompute the batch boundaries for each window
-        self.patches = []
+        self.crops = []
         for window_id, window in enumerate(self.windows):
-            patch_bounds = get_window_patch_options(
-                self.patch_size, self.overlap_size, window.bounds
+            window_crop_bounds = get_window_crop_options(
+                self.crop_size, self.overlap_size, window.bounds
             )
-            for i, patch_bound in enumerate(patch_bounds):
-                self.patches.append((window_id, patch_bound, (i, len(patch_bounds))))
+            for i, crop_bound in enumerate(window_crop_bounds):
+                self.crops.append((window_id, crop_bound, (i, len(window_crop_bounds))))
     def get_raw_inputs(
         self, index: int
     ) -> tuple[dict[str, Any], dict[str, Any], SampleMetadata]:
-        """Get the raw inputs for a single patch. Retrieve from cache if possible.
+        """Get the raw inputs for a single crop. Retrieve from cache if possible.
-        Also crops/pads the tensors by patch size to protect slicing near right/bottom edges.
+        Also crops/pads the tensors by crop size to protect slicing near right/bottom edges.
         Args:
-            index: the index of the patch.
+            index: the index of the crop.
         Returns:
             a tuple of (raw_inputs, passthrough_inputs, metadata).
@@ -432,7 +435,7 @@ class InMemoryAllPatchesDataset(torch.utils.data.Dataset):
             return self.window_cache[index]
         raw_inputs, passthrough_inputs, metadata = self.dataset.get_raw_inputs(index)
-        pad_slice_protect(raw_inputs, passthrough_inputs, self.patch_size, self.inputs)
+        pad_slice_protect(raw_inputs, passthrough_inputs, self.crop_size, self.inputs)
         self.window_cache[index] = (raw_inputs, passthrough_inputs, metadata)
         return self.window_cache[index]
@@ -476,20 +479,20 @@ class InMemoryAllPatchesDataset(torch.utils.data.Dataset):
         return cropped
     def __len__(self) -> int:
-        """Return the total number of patches in the dataset."""
-        return len(self.patches)
+        """Return the total number of crops in the dataset."""
+        return len(self.crops)
     def __getitem__(
         self, index: int
     ) -> tuple[dict[str, Any], dict[str, Any], SampleMetadata]:
-        """Return (input_dict, target_dict, metadata) for a single flattened patch."""
-        (window_id, patch_bounds, (patch_idx, num_patches)) = self.patches[index]
+        """Return (input_dict, target_dict, metadata) for a single flattened crop."""
+        (window_id, crop_bounds, (crop_idx, num_crops)) = self.crops[index]
         raw_inputs, passthrough_inputs, metadata = self.get_raw_inputs(window_id)
-        bounds = metadata.patch_bounds
+        bounds = metadata.crop_bounds
-        cur_geom = STGeometry(metadata.projection, shapely.box(*patch_bounds), None)
-        start_offset = (patch_bounds[0] - bounds[0], patch_bounds[1] - bounds[1])
-        end_offset = (patch_bounds[2] - bounds[0], patch_bounds[3] - bounds[1])
+        cur_geom = STGeometry(metadata.projection, shapely.box(*crop_bounds), None)
+        start_offset = (crop_bounds[0] - bounds[0], crop_bounds[1] - bounds[1])
+        end_offset = (crop_bounds[2] - bounds[0], crop_bounds[3] - bounds[1])
         cur_raw_inputs = self._crop_input_dict(
             raw_inputs, start_offset, end_offset, cur_geom
@@ -501,9 +504,9 @@ class InMemoryAllPatchesDataset(torch.utils.data.Dataset):
         # Adjust the metadata as well.
         cur_metadata = replace(
             metadata,
-            patch_bounds=patch_bounds,
-            patch_idx=patch_idx,
-            num_patches_in_window=num_patches,
+            crop_bounds=crop_bounds,
+            crop_idx=crop_idx,
+            num_crops_in_window=num_crops,
         )
         # Now we can compute input and target dicts via the task.

rslearn/train/data_module.py CHANGED Viewed

@@ -15,12 +15,13 @@ from rslearn.dataset import Dataset
 from rslearn.log_utils import get_logger
 from rslearn.train.tasks import Task
-from .all_patches_dataset import (
-    InMemoryAllPatchesDataset,
-    IterableAllPatchesDataset,
+from .all_crops_dataset import (
+    InMemoryAllCropsDataset,
+    IterableAllCropsDataset,
 )
 from .dataset import (
     DataInput,
+    IndexMode,
     ModelDataset,
     MultiDataset,
     RetryDataset,
@@ -68,7 +69,8 @@ class RslearnDataModule(L.LightningDataModule):
         predict_config: SplitConfig = SplitConfig(),
         name: str | None = None,
         retries: int = 0,
-        use_in_memory_all_patches_dataset: bool = False,
+        use_in_memory_all_crops_dataset: bool = False,
+        index_mode: IndexMode = IndexMode.OFF,
     ) -> None:
         """Initialize a new RslearnDataModule.
@@ -90,8 +92,9 @@ class RslearnDataModule(L.LightningDataModule):
             predict_config: split config for predict split
             name: name of the dataset
             retries: number of retries to attempt for getitem calls
-            use_in_memory_all_patches_dataset: whether to use InMemoryAllPatchesDataset
-                instead of IterableAllPatchesDataset if load_all_patches is set to true.
+            use_in_memory_all_crops_dataset: whether to use InMemoryAllCropsDataset
+                instead of IterableAllCropsDataset if load_all_crops is set to true.
+            index_mode: controls dataset index caching behavior (default: IndexMode.OFF)
         """
         super().__init__()
         self.inputs = inputs
@@ -102,7 +105,8 @@ class RslearnDataModule(L.LightningDataModule):
         self.init_workers = init_workers if init_workers > 0 else self.num_workers
         self.name = name
         self.retries = retries
-        self.use_in_memory_all_patches_dataset = use_in_memory_all_patches_dataset
+        self.use_in_memory_all_crops_dataset = use_in_memory_all_crops_dataset
+        self.index_mode = index_mode
         self.split_configs = {
             "train": default_config.update(train_config),
             "val": default_config.update(val_config),
@@ -111,15 +115,15 @@ class RslearnDataModule(L.LightningDataModule):
         }
     def setup(
-        self, stage: str, use_in_memory_all_patches_dataset: bool | None = None
+        self, stage: str, use_in_memory_all_crops_dataset: bool | None = None
     ) -> None:
         """Set up datasets and samplers.
         Args:
             stage: Either 'fit', 'validate', 'test', or 'predict'.
-            use_in_memory_all_patches_dataset: whether to use InMemoryAllPatchesDataset
-                instead of IterableAllPatchesDataset if load_all_patches is set to true.
-                If None, uses the value of self.use_in_memory_all_patches_dataset.
+            use_in_memory_all_crops_dataset: whether to use InMemoryAllCropsDataset
+                instead of IterableAllCropsDataset if load_all_crops is set to true.
+                If None, uses the value of self.use_in_memory_all_crops_dataset.
         """
         stage_to_splits = {
             "fit": ["train", "val"],
@@ -138,36 +142,37 @@ class RslearnDataModule(L.LightningDataModule):
                 workers=self.init_workers,
                 name=self.name,
                 fix_patch_pick=(split != "train"),
+                index_mode=self.index_mode,
             )
             logger.info(f"got {len(dataset)} examples in split {split}")
-            if split_config.get_load_all_patches():
-                if use_in_memory_all_patches_dataset is None:
-                    use_in_memory_all_patches_dataset = (
-                        self.use_in_memory_all_patches_dataset
+            if split_config.get_load_all_crops():
+                if use_in_memory_all_crops_dataset is None:
+                    use_in_memory_all_crops_dataset = (
+                        self.use_in_memory_all_crops_dataset
                     )
                 logger.info(
-                    f"using AllPatchesDataset (in_memory={use_in_memory_all_patches_dataset})"
+                    f"using AllCropsDataset (in_memory={use_in_memory_all_crops_dataset})"
                 )
-                patch_size = split_config.get_patch_size()
-                if patch_size is None:
+                crop_size = split_config.get_crop_size()
+                if crop_size is None:
                     raise ValueError(
-                        "patch_size is not set but must be set if load_all_patches is set"
+                        "crop_size is not set but must be set if load_all_crops is set"
                     )
-                all_patches_cls = IterableAllPatchesDataset
+                all_crops_cls = IterableAllCropsDataset
                 kwargs = dict(
                     dataset=dataset,
-                    patch_size=patch_size,
-                    overlap_ratio=split_config.get_overlap_ratio(),
+                    crop_size=crop_size,
+                    overlap_pixels=split_config.get_overlap_pixels(),
                     rank=self.trainer.global_rank if self.trainer else 0,
                     world_size=self.trainer.world_size if self.trainer else 1,
                 )
-                if use_in_memory_all_patches_dataset:
+                if use_in_memory_all_crops_dataset:
                     kwargs.pop("rank")
                     kwargs.pop("world_size")
-                    all_patches_cls = InMemoryAllPatchesDataset  # type: ignore
+                    all_crops_cls = InMemoryAllCropsDataset  # type: ignore
-                dataset = all_patches_cls(**kwargs)  # type: ignore
+                dataset = all_crops_cls(**kwargs)  # type: ignore
             if self.retries > 0:
                 dataset = RetryDataset(dataset, retries=self.retries)
@@ -204,7 +209,7 @@ class RslearnDataModule(L.LightningDataModule):
         # If the number of windows is 0, then we can set positive number of workers
         # since they won't yield anything anyway.
         num_workers = self.num_workers
-        if split_config.load_all_patches and len(dataset.get_dataset_examples()) > 0:
+        if split_config.load_all_crops and len(dataset.get_dataset_examples()) > 0:
             num_workers = min(num_workers, len(dataset.get_dataset_examples()))
         kwargs: dict[str, Any] = dict(
@@ -352,7 +357,7 @@ class MultiDatasetDataModule(L.LightningDataModule):
             stage: The stage to set up ('fit', 'validate', 'test', 'predict')
         """
         for name, data_module in self.data_modules.items():
-            data_module.setup(stage, use_in_memory_all_patches_dataset=True)  # type: ignore
+            data_module.setup(stage, use_in_memory_all_crops_dataset=True)  # type: ignore
             data_module.set_name(name)
     def _get_dataloader(self, split: str) -> DataLoader[dict[str, torch.Tensor]]:

rslearn 0.0.25__py3-none-any.whl → 0.0.27__py3-none-any.whl

rslearn 0.0.25py3-none-any.whl → 0.0.27py3-none-any.whl