PyPI - rslearn - Versions diffs - 0.0.6__py3-none-any.whl → 0.0.8__py3-none-any.whl - Mend

rslearn 0.0.6py3-none-any.whl → 0.0.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

rslearn/dataset/handler_summaries.py +130 -0
rslearn/dataset/manage.py +157 -22
rslearn/main.py +60 -8
rslearn/models/anysat.py +207 -0
rslearn/models/clay/clay.py +219 -0
rslearn/models/clay/configs/metadata.yaml +295 -0
rslearn/models/copernicusfm.py +37 -25
rslearn/models/dinov3.py +165 -0
rslearn/models/galileo/__init__.py +5 -0
rslearn/models/galileo/galileo.py +517 -0
rslearn/models/galileo/single_file_galileo.py +1672 -0
rslearn/models/panopticon_data/sensors/drone.yaml +32 -0
rslearn/models/panopticon_data/sensors/enmap.yaml +904 -0
rslearn/models/panopticon_data/sensors/goes.yaml +9 -0
rslearn/models/panopticon_data/sensors/himawari.yaml +9 -0
rslearn/models/panopticon_data/sensors/intuition.yaml +606 -0
rslearn/models/panopticon_data/sensors/landsat8.yaml +84 -0
rslearn/models/panopticon_data/sensors/modis_terra.yaml +99 -0
rslearn/models/panopticon_data/sensors/qb2_ge1.yaml +34 -0
rslearn/models/panopticon_data/sensors/sentinel1.yaml +85 -0
rslearn/models/panopticon_data/sensors/sentinel2.yaml +97 -0
rslearn/models/panopticon_data/sensors/superdove.yaml +60 -0
rslearn/models/panopticon_data/sensors/wv23.yaml +63 -0
rslearn/models/presto/presto.py +10 -7
rslearn/models/prithvi.py +1122 -0
rslearn/models/resize_features.py +45 -0
rslearn/models/simple_time_series.py +65 -10
rslearn/models/unet.py +17 -11
rslearn/models/upsample.py +2 -2
rslearn/tile_stores/default.py +31 -6
rslearn/train/transforms/normalize.py +34 -5
rslearn/train/transforms/select_bands.py +67 -0
rslearn/train/transforms/sentinel1.py +60 -0
rslearn/utils/geometry.py +61 -1
rslearn/utils/raster_format.py +7 -1
rslearn/utils/vector_format.py +13 -10
{rslearn-0.0.6.dist-info → rslearn-0.0.8.dist-info}/METADATA +144 -15
{rslearn-0.0.6.dist-info → rslearn-0.0.8.dist-info}/RECORD +42 -18
{rslearn-0.0.6.dist-info → rslearn-0.0.8.dist-info}/WHEEL +0 -0
{rslearn-0.0.6.dist-info → rslearn-0.0.8.dist-info}/entry_points.txt +0 -0
{rslearn-0.0.6.dist-info → rslearn-0.0.8.dist-info}/licenses/LICENSE +0 -0
{rslearn-0.0.6.dist-info → rslearn-0.0.8.dist-info}/top_level.txt +0 -0

rslearn/dataset/handler_summaries.py ADDED Viewed

@@ -0,0 +1,130 @@
+"""This module contains dataclasses for summarizing the results of dataset operations.
+They can be used by callers to emit telemetry / logs, or discarded.
+"""
+from dataclasses import dataclass
+@dataclass
+class LayerPrepareSummary:
+    """Results for preparing a single layer."""
+    # Identity
+    layer_name: str
+    data_source_name: str
+    # Timing
+    duration_seconds: float
+    # Counts
+    windows_prepared: int
+    windows_skipped: int
+    get_items_attempts: int
+@dataclass
+class PrepareDatasetWindowsSummary:
+    """Results from prepare_dataset_windows operation for telemetry purposes."""
+    # Timing
+    duration_seconds: float
+    # Counts
+    total_windows_requested: int
+    # Per-layer summaries
+    layer_summaries: list[LayerPrepareSummary]
+@dataclass
+class IngestCounts:
+    """Known ingestion counts."""
+    items_ingested: int
+    geometries_ingested: int
+@dataclass
+class UnknownIngestCounts:
+    """Indicates ingestion counts are unknown due to partial failure."""
+    items_attempted: int
+    geometries_attempted: int
+@dataclass
+class LayerIngestSummary:
+    """Results for ingesting a single layer."""
+    # Identity
+    layer_name: str
+    data_source_name: str
+    # Timing
+    duration_seconds: float
+    # Counts - either known or unknown
+    ingest_counts: IngestCounts | UnknownIngestCounts
+    ingest_attempts: int
+@dataclass
+class IngestDatasetJobsSummary:
+    """Results from ingesting a set of jobs; for telemetry purposes."""
+    # Timing
+    duration_seconds: float
+    # Counts
+    num_jobs: int
+    # Per-layer summaries
+    layer_summaries: list[LayerIngestSummary]
+@dataclass
+class MaterializeWindowLayerSummary:
+    """Results for materializing a single window layer."""
+    skipped: bool
+    materialize_attempts: int
+@dataclass
+class MaterializeWindowLayersSummary:
+    """Results for materialize a given layer for all windows in a materialize call."""
+    # Identity
+    layer_name: str
+    data_source_name: str
+    # Timing
+    duration_seconds: float
+    # Counts
+    total_windows_requested: int
+    num_windows_materialized: int
+    materialize_attempts: int
+@dataclass
+class MaterializeDatasetWindowsSummary:
+    """Results from materialize_dataset_windows operation for telemetry purposes."""
+    # Timing
+    duration_seconds: float
+    # Counts
+    total_windows_requested: int
+    # Per-layer summaries
+    layer_summaries: list[MaterializeWindowLayersSummary]
+@dataclass
+class ErrorOutcome:
+    """TBD what goes in here, if anything."""
+    # Timing
+    duration_seconds: float

rslearn/dataset/manage.py CHANGED Viewed

@@ -13,6 +13,13 @@ from rslearn.config import (
     RasterLayerConfig,
 )
 from rslearn.data_sources import DataSource, Item
+from rslearn.dataset.handler_summaries import (
+    LayerPrepareSummary,
+    MaterializeDatasetWindowsSummary,
+    MaterializeWindowLayersSummary,
+    MaterializeWindowLayerSummary,
+    PrepareDatasetWindowsSummary,
+)
 from rslearn.log_utils import get_logger
 from rslearn.tile_stores import TileStore, get_tile_store_with_layer
@@ -23,7 +30,24 @@ from .window import Window, WindowLayerData
 logger = get_logger(__name__)
-def retry(fn: Callable, retry_max_attempts: int, retry_backoff: timedelta) -> Any:
+class AttemptsCounter:
+    """A simple counter for tracking attempts (including initial attempt and retries)."""
+    def __init__(self) -> None:
+        """Initialize counter with value 0."""
+        self.value = 0
+    def increment(self) -> None:
+        """Increment the counter by 1."""
+        self.value += 1
+def retry(
+    fn: Callable,
+    retry_max_attempts: int,
+    retry_backoff: timedelta,
+    attempts_counter: AttemptsCounter | None = None,
+) -> Any:
     """Retry the function multiple times in case of error.
     The function is retried until either the attempts are exhausted, or the function
@@ -37,8 +61,11 @@ def retry(fn: Callable, retry_max_attempts: int, retry_backoff: timedelta) -> An
             retries. The actual time is (retry_backoff * attempts) * r, where r is a
             random number between 1 and 2, and attempts is the number of attempts tried
             so far.
+        attempts_counter: an optional counter to increment for each attempt
     """
     for attempt_idx in range(retry_max_attempts):
+        if attempts_counter:
+            attempts_counter.increment()
         try:
             return fn()
         except Exception as e:
@@ -47,6 +74,8 @@ def retry(fn: Callable, retry_max_attempts: int, retry_backoff: timedelta) -> An
             time.sleep(sleep_base_seconds * (1 + random.random()))
     # Last attempt. This time we don't catch the exception.
+    if attempts_counter:
+        attempts_counter.increment()
     return fn()
@@ -56,7 +85,7 @@ def prepare_dataset_windows(
     force: bool = False,
     retry_max_attempts: int = 0,
     retry_backoff: timedelta = timedelta(minutes=1),
-) -> None:
+) -> PrepareDatasetWindowsSummary:
     """Prepare windows in a dataset.
     Preparing a window involves looking up items corresponding to the window in each of
@@ -70,10 +99,28 @@ def prepare_dataset_windows(
         retry_max_attempts: set greater than zero to retry for this many attempts in
             case of error.
         retry_backoff: how long to wait before retrying (see retry).
+    Returns:
+        a summary of the prepare operation, fit for telemetry purposes
     """
+    start_time = time.monotonic()
+    layer_summaries: list[LayerPrepareSummary] = []
     # Iterate over retrieved layers, and prepare each one.
     for layer_name, layer_cfg in dataset.layers.items():
+        layer_start_time = time.monotonic()
         if not layer_cfg.data_source:
+            layer_summaries.append(
+                LayerPrepareSummary(
+                    layer_name=layer_name,
+                    data_source_name="N/A",
+                    duration_seconds=time.monotonic() - layer_start_time,
+                    windows_prepared=0,
+                    windows_skipped=len(windows),
+                    get_items_attempts=0,
+                )
+            )
             continue
         data_source_cfg = layer_cfg.data_source
@@ -85,7 +132,18 @@ def prepare_dataset_windows(
                 continue
             needed_windows.append(window)
         logger.info(f"Preparing {len(needed_windows)} windows for layer {layer_name}")
         if len(needed_windows) == 0:
+            layer_summaries.append(
+                LayerPrepareSummary(
+                    layer_name=layer_name,
+                    data_source_name=data_source_cfg.name,
+                    duration_seconds=time.monotonic() - layer_start_time,
+                    windows_prepared=0,
+                    windows_skipped=len(windows),
+                    get_items_attempts=0,
+                )
+            )
             continue
         # Create data source after checking for at least one window so it can be fast
@@ -115,10 +173,12 @@ def prepare_dataset_windows(
             geometries.append(geometry)
+        attempts_counter = AttemptsCounter()
         results = retry(
             fn=lambda: data_source.get_items(geometries, data_source_cfg.query_config),
             retry_max_attempts=retry_max_attempts,
             retry_backoff=retry_backoff,
+            attempts_counter=attempts_counter,
         )
         for window, result in zip(needed_windows, results):
@@ -131,6 +191,25 @@ def prepare_dataset_windows(
             )
             window.save_layer_datas(layer_datas)
+        layer_summaries.append(
+            LayerPrepareSummary(
+                layer_name=layer_name,
+                data_source_name=data_source_cfg.name,
+                duration_seconds=time.monotonic() - layer_start_time,
+                windows_prepared=len(needed_windows),  # we assume all have succeeded
+                windows_skipped=len(windows) - len(needed_windows),
+                get_items_attempts=attempts_counter.value,
+            )
+        )
+    summary = PrepareDatasetWindowsSummary(
+        duration_seconds=time.monotonic() - start_time,
+        total_windows_requested=len(windows),
+        layer_summaries=layer_summaries,
+    )
+    return summary
 def ingest_dataset_windows(
     dataset: Dataset,
@@ -251,7 +330,7 @@ def materialize_window(
     layer_cfg: LayerConfig,
     retry_max_attempts: int = 0,
     retry_backoff: timedelta = timedelta(minutes=1),
-) -> None:
+) -> MaterializeWindowLayerSummary:
     """Materialize a window.
     Args:
@@ -264,10 +343,16 @@ def materialize_window(
         retry_max_attempts: set greater than zero to retry for this many attempts in
             case of error.
         retry_backoff: how long to wait before retrying (see retry).
+    Returns:
+        a summary of the materialize operation, fit for telemetry purposes
     """
     # Check if layer is materialized already.
     if window.is_layer_completed(layer_name):
-        return
+        return MaterializeWindowLayerSummary(
+            skipped=True,
+            materialize_attempts=0,
+        )
     layer_datas = window.load_layer_datas()
     if layer_name not in layer_datas:
@@ -276,7 +361,11 @@ def materialize_window(
             layer_name,
             window.name,
         )
-        return
+        return MaterializeWindowLayerSummary(
+            skipped=True,
+            materialize_attempts=0,
+        )
     layer_data = layer_datas[layer_name]
     item_groups = []
     for serialized_group in layer_data.serialized_item_groups:
@@ -288,6 +377,8 @@ def materialize_window(
     if layer_cfg.data_source is None:
         raise ValueError("data_source is required")
+    attempts_counter = AttemptsCounter()
     if layer_cfg.data_source.ingest:
         if not is_window_ingested(dataset, window, check_layer_name=layer_name):
             logger.info(
@@ -295,9 +386,12 @@ def materialize_window(
                 layer_name,
                 window.name,
             )
-            return
+            return MaterializeWindowLayerSummary(
+                skipped=True,
+                materialize_attempts=0,
+            )
-        print(
+        logger.info(
             f"Materializing {len(item_groups)} item groups in layer {layer_name} from tile store"
         )
@@ -316,11 +410,12 @@ def materialize_window(
             ),
             retry_max_attempts=retry_max_attempts,
             retry_backoff=retry_backoff,
+            attempts_counter=attempts_counter,
         )
     else:
         # This window is meant to be materialized directly from the data source.
-        print(
+        logger.info(
             f"Materializing {len(item_groups)} item groups in layer {layer_name} via data source"
         )
         retry(
@@ -329,15 +424,21 @@ def materialize_window(
             ),
             retry_max_attempts=retry_max_attempts,
             retry_backoff=retry_backoff,
+            attempts_counter=attempts_counter,
         )
+    return MaterializeWindowLayerSummary(
+        skipped=False,
+        materialize_attempts=attempts_counter.value,
+    )
 def materialize_dataset_windows(
     dataset: Dataset,
     windows: list[Window],
     retry_max_attempts: int = 0,
     retry_backoff: timedelta = timedelta(minutes=1),
-) -> None:
+) -> MaterializeDatasetWindowsSummary:
     """Materialize items for retrieved layers in a dataset.
     The portions of items corresponding to dataset windows are extracted from the tile
@@ -349,24 +450,58 @@ def materialize_dataset_windows(
         retry_max_attempts: set greater than zero to retry for this many attempts in
             case of error.
         retry_backoff: how long to wait before retrying (see retry).
+    Returns:
+        a summary of the materialize operation, fit for telemetry purposes
     """
+    start_time = time.monotonic()
+    layer_summaries: list[MaterializeWindowLayersSummary] = []
     tile_store = dataset.get_tile_store()
     for layer_name, layer_cfg in dataset.layers.items():
+        layer_start_time = time.monotonic()
+        total_materialize_attempts = 0
+        total_skipped = 0
+        data_source_name = "N/A"
         if not layer_cfg.data_source:
-            continue
+            total_skipped = len(windows)
+        else:
+            data_source_name = layer_cfg.data_source.name
+            data_source = rslearn.data_sources.data_source_from_config(
+                layer_cfg, dataset.path
+            )
-        data_source = rslearn.data_sources.data_source_from_config(
-            layer_cfg, dataset.path
-        )
+            for window in windows:
+                window_summary = materialize_window(
+                    window=window,
+                    dataset=dataset,
+                    data_source=data_source,
+                    tile_store=tile_store,
+                    layer_name=layer_name,
+                    layer_cfg=layer_cfg,
+                    retry_max_attempts=retry_max_attempts,
+                    retry_backoff=retry_backoff,
+                )
+                total_materialize_attempts += window_summary.materialize_attempts
+                if window_summary.skipped:
+                    total_skipped += 1
-        for window in windows:
-            materialize_window(
-                window=window,
-                dataset=dataset,
-                data_source=data_source,
-                tile_store=tile_store,
+        layer_summaries.append(
+            MaterializeWindowLayersSummary(
                 layer_name=layer_name,
-                layer_cfg=layer_cfg,
-                retry_max_attempts=retry_max_attempts,
-                retry_backoff=retry_backoff,
+                data_source_name=data_source_name,
+                duration_seconds=time.monotonic() - layer_start_time,
+                total_windows_requested=len(windows),
+                num_windows_materialized=len(windows) - total_skipped,
+                materialize_attempts=total_materialize_attempts,
             )
+        )
+    return MaterializeDatasetWindowsSummary(
+        duration_seconds=time.monotonic() - start_time,
+        total_windows_requested=len(windows),
+        layer_summaries=layer_summaries,
+    )

rslearn/main.py CHANGED Viewed

@@ -4,6 +4,7 @@ import argparse
 import multiprocessing
 import random
 import sys
+import time
 from collections.abc import Callable
 from datetime import UTC, datetime, timedelta
 from typing import Any, TypeVar
@@ -19,8 +20,18 @@ from rslearn.const import WGS84_EPSG
 from rslearn.data_sources import Item, data_source_from_config
 from rslearn.dataset import Dataset, Window, WindowLayerData
 from rslearn.dataset.add_windows import add_windows_from_box, add_windows_from_file
+from rslearn.dataset.handler_summaries import (
+    ErrorOutcome,
+    IngestCounts,
+    IngestDatasetJobsSummary,
+    LayerIngestSummary,
+    MaterializeDatasetWindowsSummary,
+    PrepareDatasetWindowsSummary,
+    UnknownIngestCounts,
+)
 from rslearn.dataset.index import DatasetIndex
 from rslearn.dataset.manage import (
+    AttemptsCounter,
     materialize_dataset_windows,
     prepare_dataset_windows,
     retry,
@@ -287,7 +298,7 @@ def add_apply_on_windows_args(parser: argparse.ArgumentParser) -> None:
 def apply_on_windows(
-    f: Callable[[list[Window]], None],
+    f: Callable[[list[Window]], Any],
     dataset: Dataset,
     group: str | list[str] | None = None,
     names: list[str] | None = None,
@@ -367,7 +378,7 @@ def apply_on_windows(
         p.close()
-def apply_on_windows_args(f: Callable[..., None], args: argparse.Namespace) -> None:
+def apply_on_windows_args(f: Callable[..., Any], args: argparse.Namespace) -> None:
     """Call apply_on_windows with arguments passed via command-line interface."""
     dataset = Dataset(UPath(args.root), args.disabled_layers)
     apply_on_windows(
@@ -413,12 +424,12 @@ class PrepareHandler:
         """
         self.dataset = dataset
-    def __call__(self, windows: list[Window]) -> None:
+    def __call__(self, windows: list[Window]) -> PrepareDatasetWindowsSummary:
         """Prepares the windows from apply_on_windows."""
         logger.info(f"Running prepare on {len(windows)} windows")
         if self.dataset is None:
             raise ValueError("dataset not set")
-        prepare_dataset_windows(
+        return prepare_dataset_windows(
             self.dataset,
             windows,
             self.force,
@@ -502,14 +513,20 @@ class IngestHandler:
     def __call__(
         self, jobs: list[tuple[str, LayerConfig, Item, list[STGeometry]]]
-    ) -> None:
+    ) -> IngestDatasetJobsSummary:
         """Ingest the specified items.
         The items are computed from list of windows via IngestHandler.get_jobs.
         Args:
-            jobs: list of (layer_name, item, geometries) tuples to ingest.
+            jobs: list of (layer_name, layer_cfg, item, geometries) tuples to ingest.
+        Returns:
+            summary of the ingest jobs operation fit for telemetry purposes.
         """
+        start_time = time.monotonic()
+        layer_summaries: list[LayerIngestSummary] = []
         logger.info(f"Running ingest for {len(jobs)} jobs")
         import gc
@@ -533,6 +550,8 @@ class IngestHandler:
             layer_cfg = self.dataset.layers[layer_name]
             data_source = data_source_from_config(layer_cfg, self.dataset.path)
+            attempts_counter = AttemptsCounter()
+            ingest_counts: IngestCounts | UnknownIngestCounts
             try:
                 retry(
                     lambda: data_source.ingest(
@@ -544,18 +563,47 @@ class IngestHandler:
                     ),
                     retry_max_attempts=self.retry_max_attempts,
                     retry_backoff=self.retry_backoff,
+                    attempts_counter=attempts_counter,
+                )
+                ingest_counts = IngestCounts(
+                    items_ingested=len(items_and_geometries),
+                    geometries_ingested=sum(
+                        len(geometries) for _, geometries in items_and_geometries
+                    ),
                 )
             except Exception as e:
                 if not self.ignore_errors:
                     raise
+                ingest_counts = UnknownIngestCounts(
+                    items_attempted=len(items_and_geometries),
+                    geometries_attempted=sum(
+                        len(geometries) for _, geometries in items_and_geometries
+                    ),
+                )
                 logger.error(
                     "warning: got error while ingesting "
                     + f"{len(items_and_geometries)} items: {e}"
                 )
+            layer_summaries.append(
+                LayerIngestSummary(
+                    layer_name=layer_name,
+                    data_source_name=getattr(layer_cfg.data_source, "name", "N/A"),
+                    duration_seconds=time.monotonic() - start_time,
+                    ingest_counts=ingest_counts,
+                    ingest_attempts=attempts_counter.value,
+                )
+            )
         gc.collect()
+        return IngestDatasetJobsSummary(
+            duration_seconds=time.monotonic() - start_time,
+            num_jobs=len(jobs),
+            layer_summaries=layer_summaries,
+        )
     def _load_layer_data_for_windows(
         self, windows: list[Window], workers: int
     ) -> list[tuple[Window, dict[str, WindowLayerData]]]:
@@ -686,13 +734,16 @@ class MaterializeHandler:
         """
         self.dataset = dataset
-    def __call__(self, windows: list[Window]) -> None:
+    def __call__(
+        self, windows: list[Window]
+    ) -> MaterializeDatasetWindowsSummary | ErrorOutcome:
         """Materializes the windows from apply_on_windows."""
         logger.info(f"Running Materialize with {len(windows)} windows")
+        start_time = time.monotonic()
         if self.dataset is None:
             raise ValueError("dataset not set")
         try:
-            materialize_dataset_windows(
+            return materialize_dataset_windows(
                 self.dataset,
                 windows,
                 retry_max_attempts=self.retry_max_attempts,
@@ -703,6 +754,7 @@ class MaterializeHandler:
                 logger.error(f"Error materializing windows: {e}")
                 raise
             logger.warning(f"Ignoring error while materializing windows: {e}")
+            return ErrorOutcome(duration_seconds=time.monotonic() - start_time)
 @register_handler("dataset", "materialize")

rslearn 0.0.6__py3-none-any.whl → 0.0.8__py3-none-any.whl

rslearn 0.0.6py3-none-any.whl → 0.0.8py3-none-any.whl