PyPI - olmoearth-pretrain-minimal - Versions diffs - 0.0.1__py3-none-any.whl - Mend

olmoearth-pretrain-minimal 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

olmoearth_pretrain_minimal/olmoearth_pretrain_v1/utils/constants.py ADDED Viewed

@@ -0,0 +1,519 @@
+"""Constants shared across the OlmoEarth Pretrain package.
+Warning: this is only developed for raster data currently.
+"""
+from dataclasses import dataclass
+from enum import Enum
+# The highest resolution that we are working at.
+# Everything else is a factor (which is a power of 2) coarser than this resolution.
+BASE_RESOLUTION = 0.625
+# The default image tile size.
+# Some images may be smaller if they are stored at a coarser resolution compared to the
+# resolution that the grid is based on.
+IMAGE_TILE_SIZE = 256
+PROJECTION_CRS = "EPSG:4326"
+# Default missing value for raster data.
+MISSING_VALUE = -99999
+# Default maximum sequence length.
+MAX_SEQUENCE_LENGTH = 12
+# Resolution of the input data in meters
+BASE_GSD = 10
+# Default nodata value for Sentinel-1 data.
+SENTINEL1_NODATA = -32768
+# Number of timesteps for YEAR data.
+YEAR_NUM_TIMESTEPS = 12
+def get_resolution(resolution_factor: int) -> float | int:
+    """Compute the resolution.
+    If it is an integer, then we cast it to int so that it works with the raw OlmoEarth Pretrain
+    dataset, where some files are named based on the integer. We may want to change
+    this in the future to avoid the extra code here.
+    """
+    resolution = BASE_RESOLUTION * resolution_factor
+    if float(int(resolution)) == resolution:
+        return int(resolution)
+    return resolution
+@dataclass(frozen=True)
+class BandSet:
+    """A group of bands that is stored at the same resolution.
+    Many modalities only have one band set, but some have different bands at different
+    resolutions.
+    """
+    # List of band names.
+    bands: list[str]
+    # Resolution is BASE_RESOLUTION * resolution_factor.
+    # If resolution == 0, this means the data
+    # does not vary in space (e.g. latlons)
+    resolution_factor: int
+    def __hash__(self) -> int:
+        """Hash this BandSet."""
+        return hash((tuple(self.bands), self.resolution_factor))
+    def get_resolution(self) -> float:
+        """Compute the resolution."""
+        return get_resolution(self.resolution_factor)
+    def get_expected_image_size(self, modality_resolution_factor: int) -> int:
+        """Get the expected size of images containing these bands.
+        Args:
+            modality_resolution_factor: the resolution factor of the modality.
+        Returns:
+            the expected image size.
+        """
+        return IMAGE_TILE_SIZE // (self.resolution_factor // modality_resolution_factor)
+class TimeSpan(str, Enum):
+    """Enum to distinguish data that is valid for different time ranges."""
+    # Only one data point (not time series).
+    STATIC = "static"
+    # Monthly over one year.
+    YEAR = "year"
+    # Every data point in a two-week period.
+    TWO_WEEK = "two_week"
+    def get_suffix(self) -> str:
+        """Returns the suffix used for this timespan in raw OlmoEarth Pretrain dataset."""
+        if self == TimeSpan.STATIC:
+            return ""
+        if self == TimeSpan.YEAR:
+            return "_monthly"
+        if self == TimeSpan.TWO_WEEK:
+            return "_freq"
+        raise ValueError("invalid TimeSpan")
+@dataclass(frozen=True)
+class ModalitySpec:
+    """Modality specification.
+    Args:
+        name: the name of the modality.
+        tile_resolution_factor: the factor of how much more ground area is covered by the tile compared with a tile
+                        of IMAGE_TILE_SIZE x IMAGE_TILE_SIZE pixels at the base resolution.
+        band_sets: the band sets of the modality, ie the units of tokenization.
+        is_multitemporal: whether the modality is multitemporal.
+        ignore_when_parsing: whether to ignore the modality when parsing the data form the csv file.
+        image_tile_size_factor: the factor of how much bigger the dimensions of the image tile are compared with the base tile size.
+    """
+    name: str
+    tile_resolution_factor: int
+    band_sets: list[BandSet]
+    is_multitemporal: bool
+    ignore_when_parsing: bool  # If true this modality is not parsed from the csv file and not loaded form a file
+    image_tile_size_factor: int = 1
+    def __hash__(self) -> int:
+        """Hash this Modality."""
+        return hash(self.name)
+    def get_tile_resolution(self) -> float:
+        """Compute the tile resolution."""
+        return get_resolution(self.tile_resolution_factor)
+    def bandsets_as_indices(self) -> list[list[int]]:
+        """Return band sets as indices."""
+        indices = []
+        offset = 0
+        for band_set in self.band_sets:
+            num_bands = len(band_set.bands)
+            indices.append(list(range(offset, offset + num_bands)))
+            offset += num_bands
+        return indices
+    @property
+    def band_order(self) -> list[str]:
+        """Get all bands."""
+        return sum((list(band_set.bands) for band_set in self.band_sets), [])
+    @property
+    def num_band_sets(self) -> int:
+        """Get the number of band sets."""
+        return len(self.band_sets)
+    @property
+    def num_bands(self) -> int:
+        """Get the number of channels.
+        The number of channels is the sum of the number of bands in all the band sets.
+        """
+        return sum(len(band_set.bands) for band_set in self.band_sets)
+    def get_expected_tile_size(self) -> int:
+        """Get the expected size of the tile."""
+        if self.image_tile_size_factor < 0:
+            return IMAGE_TILE_SIZE // abs(self.image_tile_size_factor)
+        else:
+            return IMAGE_TILE_SIZE * self.image_tile_size_factor
+    @property
+    def is_spatial(self) -> bool:
+        """Does the modality have spatial data."""
+        # Tile size must be greater than 1 to have spatial varying data.
+        return self.get_tile_resolution() > 0 and self.get_expected_tile_size() > 1
+    @property
+    def is_spacetime_varying(self) -> bool:
+        """Does the modality vary in space and time."""
+        return self.is_spatial and self.is_multitemporal
+    @property
+    def is_space_only_varying(self) -> bool:
+        """Does the modality vary in space and not time."""
+        return self.is_spatial and not self.is_multitemporal
+    @property
+    def is_time_only_varying(self) -> bool:
+        """Does the modality vary in time and not space."""
+        return not self.is_spatial and self.is_multitemporal
+    @property
+    def is_static_in_space_and_time(self) -> bool:
+        """Does the modality vary in neither space or space."""
+        return not self.is_spatial and not self.is_multitemporal
+class Modality:
+    """Enum-like access to ModalitySpecs."""
+    NAIP = ModalitySpec(
+        name="naip",
+        tile_resolution_factor=1,
+        band_sets=[BandSet(["R", "G", "B", "IR"], 1)],
+        is_multitemporal=False,
+        ignore_when_parsing=False,
+    )
+    # NAIP_10 is the NAIP data that covers the same extent as a IMAGE_TILE_SIZE x IMAGE_TILE_SIZE tile
+    # at 10 m/pixel resolution but is still stored at NAIP resolution.
+    NAIP_10 = ModalitySpec(
+        name="naip_10",
+        tile_resolution_factor=16,
+        band_sets=[BandSet(["R", "G", "B", "IR"], 1)],
+        is_multitemporal=False,
+        ignore_when_parsing=False,
+        # Currently this is set to 4x (2.5 m/pixel) so that it is more feasible to
+        # train with NAIP_10. This way we end up with 512x512 NAIP images in the
+        # 128x128 H5 files instead of 2048x2048, which slows down data loading.
+        image_tile_size_factor=4,
+    )
+    SENTINEL1 = ModalitySpec(
+        name="sentinel1",
+        tile_resolution_factor=16,
+        band_sets=[BandSet(["vv", "vh"], 16)],
+        is_multitemporal=True,
+        ignore_when_parsing=False,
+    )
+    SENTINEL2 = ModalitySpec(
+        name="sentinel2",
+        tile_resolution_factor=16,
+        band_sets=[
+            # 10 m/pixel bands.
+            BandSet(["B02", "B03", "B04", "B08"], 16),
+            # 20 m/pixel bands.
+            BandSet(["B05", "B06", "B07", "B8A", "B11", "B12"], 32),
+            # 60 m/pixel bands that we store at 40 m/pixel.
+            BandSet(["B01", "B09", "B10"], 64),
+        ],
+        is_multitemporal=True,
+        ignore_when_parsing=False,
+    )
+    SENTINEL2_L2A = ModalitySpec(
+        name="sentinel2_l2a",
+        tile_resolution_factor=16,
+        band_sets=[
+            # 10 m/pixel bands.
+            BandSet(["B02", "B03", "B04", "B08"], 16),
+            # 20 m/pixel bands.
+            BandSet(["B05", "B06", "B07", "B8A", "B11", "B12"], 32),
+            # 60 m/pixel bands that we store at 40 m/pixel.
+            BandSet(["B01", "B09"], 64),
+        ],
+        is_multitemporal=True,
+        ignore_when_parsing=False,
+    )
+    LANDSAT = ModalitySpec(
+        name="landsat",
+        tile_resolution_factor=16,
+        band_sets=[
+            # 15 m/pixel bands that we store at 10 m/pixel.
+            BandSet(["B8"], 16),
+            # 30 m/pixel bands that we store at 20 m/pixel.
+            BandSet(["B1", "B2", "B3", "B4", "B5", "B6", "B7", "B9", "B10", "B11"], 32),
+        ],
+        is_multitemporal=True,
+        ignore_when_parsing=False,
+    )
+    WORLDCOVER = ModalitySpec(
+        name="worldcover",
+        tile_resolution_factor=16,
+        band_sets=[BandSet(["B1"], 16)],
+        is_multitemporal=False,
+        ignore_when_parsing=False,
+    )
+    WORLDCEREAL = ModalitySpec(
+        name="worldcereal",
+        tile_resolution_factor=16,
+        band_sets=[
+            BandSet(
+                [
+                    "tc-annual-temporarycrops-classification",
+                    "tc-maize-main-irrigation-classification",
+                    "tc-maize-main-maize-classification",
+                    "tc-maize-second-irrigation-classification",
+                    "tc-maize-second-maize-classification",
+                    "tc-springcereals-springcereals-classification",
+                    "tc-wintercereals-irrigation-classification",
+                    "tc-wintercereals-wintercereals-classification",
+                ],
+                16,
+            )
+        ],
+        is_multitemporal=False,
+        ignore_when_parsing=False,
+    )
+    SRTM = ModalitySpec(
+        name="srtm",
+        tile_resolution_factor=16,
+        band_sets=[BandSet(["srtm"], 16)],
+        is_multitemporal=False,
+        ignore_when_parsing=False,
+    )
+    OPENSTREETMAP = ModalitySpec(
+        name="openstreetmap",
+        tile_resolution_factor=16,
+        band_sets=[
+            BandSet(
+                [
+                    "aerialway_pylon",
+                    "aerodrome",
+                    "airstrip",
+                    "amenity_fuel",
+                    "building",
+                    "chimney",
+                    "communications_tower",
+                    "crane",
+                    "flagpole",
+                    "fountain",
+                    "generator_wind",
+                    "helipad",
+                    "highway",
+                    "leisure",
+                    "lighthouse",
+                    "obelisk",
+                    "observatory",
+                    "parking",
+                    "petroleum_well",
+                    "power_plant",
+                    "power_substation",
+                    "power_tower",
+                    "river",
+                    "runway",
+                    "satellite_dish",
+                    "silo",
+                    "storage_tank",
+                    "taxiway",
+                    "water_tower",
+                    "works",
+                ],
+                1,
+            )
+        ],
+        is_multitemporal=False,
+        ignore_when_parsing=True,
+    )
+    OPENSTREETMAP_RASTER = ModalitySpec(
+        name="openstreetmap_raster",
+        tile_resolution_factor=16,
+        band_sets=[
+            BandSet(
+                [
+                    "aerialway_pylon",
+                    "aerodrome",
+                    "airstrip",
+                    "amenity_fuel",
+                    "building",
+                    "chimney",
+                    "communications_tower",
+                    "crane",
+                    "flagpole",
+                    "fountain",
+                    "generator_wind",
+                    "helipad",
+                    "highway",
+                    "leisure",
+                    "lighthouse",
+                    "obelisk",
+                    "observatory",
+                    "parking",
+                    "petroleum_well",
+                    "power_plant",
+                    "power_substation",
+                    "power_tower",
+                    "river",
+                    "runway",
+                    "satellite_dish",
+                    "silo",
+                    "storage_tank",
+                    "taxiway",
+                    "water_tower",
+                    "works",
+                ],
+                4,
+            )
+        ],
+        is_multitemporal=False,
+        ignore_when_parsing=False,
+    )
+    ERA5 = ModalitySpec(
+        name="era5",
+        # 9 km/pixel bands that we store at 150 m/pixel.
+        tile_resolution_factor=256,
+        band_sets=[
+            BandSet(
+                [
+                    "2m-temperature",
+                    "2m-dewpoint-temperature",
+                    "surface-pressure",
+                    "10m-u-component-of-wind",
+                    "10m-v-component-of-wind",
+                    "total-precipitation",
+                ],
+                256,
+            ),
+        ],
+        is_multitemporal=True,
+        ignore_when_parsing=True,
+    )
+    ERA5_10 = ModalitySpec(
+        name="era5_10",
+        # 9 km/pixel bands that we store at 2.56 km/pixel.
+        tile_resolution_factor=16,
+        band_sets=[
+            BandSet(
+                [
+                    "2m-temperature",
+                    "2m-dewpoint-temperature",
+                    "surface-pressure",
+                    "10m-u-component-of-wind",
+                    "10m-v-component-of-wind",
+                    "total-precipitation",
+                ],
+                4096,
+            ),
+        ],
+        is_multitemporal=True,
+        ignore_when_parsing=False,
+        image_tile_size_factor=-256,
+    )
+    LATLON = ModalitySpec(
+        name="latlon",
+        tile_resolution_factor=0,
+        band_sets=[BandSet(["lat", "lon"], 0)],
+        is_multitemporal=False,
+        ignore_when_parsing=True,
+    )
+    GSE = ModalitySpec(
+        name="gse",
+        tile_resolution_factor=16,
+        band_sets=[
+            BandSet(
+                [f"A{idx:02d}" for idx in range(64)],
+                16,
+            ),
+        ],
+        is_multitemporal=False,
+        ignore_when_parsing=False,
+    )
+    CDL = ModalitySpec(
+        name="cdl",
+        tile_resolution_factor=16,
+        band_sets=[BandSet(["cdl"], 16)],
+        is_multitemporal=False,
+        ignore_when_parsing=False,
+    )
+    WORLDPOP = ModalitySpec(
+        name="worldpop",
+        tile_resolution_factor=16,
+        band_sets=[BandSet(["B1"], 16)],
+        is_multitemporal=False,
+        ignore_when_parsing=False,
+    )
+    WRI_CANOPY_HEIGHT_MAP = ModalitySpec(
+        name="wri_canopy_height_map",
+        tile_resolution_factor=16,
+        band_sets=[BandSet(["B1"], 16)],
+        is_multitemporal=False,
+        ignore_when_parsing=False,
+    )
+    @classmethod
+    def get(self, name: str) -> ModalitySpec:
+        """Get the ModalitySpec with the specified name."""
+        modality = getattr(Modality, name.upper())
+        assert modality.name == name
+        return modality
+    @classmethod
+    def values(self) -> list[ModalitySpec]:
+        """Get all of the ModalitySpecs."""
+        modalities = []
+        for k in dir(Modality):
+            modality = getattr(Modality, k)
+            if not isinstance(modality, ModalitySpec):
+                continue
+            modalities.append(modality)
+        return modalities
+    @classmethod
+    def names(self) -> list[str]:
+        """Get all of the modality names."""
+        return [modality.name for modality in self.values()]
+# Latlon and timestamps
+LATLON = ["lat", "lon"]
+TIMESTAMPS = ["day", "month", "year"]
+def get_modality_specs_from_names(names: list[str]) -> list[ModalitySpec]:
+    """Get the modality specs from the names."""
+    return [Modality.get(name) for name in names]

olmoearth_pretrain_minimal/olmoearth_pretrain_v1/utils/datatypes.py ADDED Viewed

@@ -0,0 +1,165 @@
+"""Data structures for OlmoEarth Pretrain."""
+from __future__ import annotations
+from enum import Enum
+from typing import TYPE_CHECKING, Any, NamedTuple
+import torch
+from olmoearth_pretrain_minimal.olmoearth_pretrain_v1.utils.types import ArrayTensor
+class MaskValue(Enum):
+    """Masks can take 4 possible values.
+    ONLINE_ENCODER: The token is seen by the online encoder
+    TARGET_ENCODER_ONLY: The token is seen by the target encoder only
+    DECODER: The token is seen by the decoder only
+    MISSING: The token is missing
+    """
+    ONLINE_ENCODER = 0
+    TARGET_ENCODER_ONLY = 1
+    DECODER = 2
+    MISSING = 3
+class MaskedOlmoEarthSample(NamedTuple):
+    """A masked sample of the data from the OlmoEarth Pretrain dataset.
+    We always require sentinel2 data.
+    This is a namedtuple that contains the data for a single sample from the OlmoEarth Pretrain dataset.
+    latlon and timestamps are the same for all modalities.
+    For each modality. we have an ArrayTensor named by modality, and a mask for each modality named by modality_mask.
+    we also have a mask for the latlon called latlon_mask
+    """
+    timestamps: (
+        ArrayTensor  # [B, T, D=3], where D=[day, month, year] (months are zero indexed)
+    )
+    sentinel2_l2a: ArrayTensor | None = None
+    sentinel2_l2a_mask: ArrayTensor | None = None
+    sentinel1: ArrayTensor | None = None
+    sentinel1_mask: ArrayTensor | None = None
+    worldcover: ArrayTensor | None = None
+    worldcover_mask: ArrayTensor | None = None
+    latlon: ArrayTensor | None = None  # [B, 2]
+    latlon_mask: ArrayTensor | None = None
+    openstreetmap_raster: ArrayTensor | None = None
+    openstreetmap_raster_mask: ArrayTensor | None = None
+    srtm: ArrayTensor | None = None
+    srtm_mask: ArrayTensor | None = None
+    landsat: ArrayTensor | None = None
+    landsat_mask: ArrayTensor | None = None
+    naip: ArrayTensor | None = None
+    naip_mask: ArrayTensor | None = None
+    naip_10: ArrayTensor | None = None
+    naip_10_mask: ArrayTensor | None = None
+    gse: ArrayTensor | None = None
+    gse_mask: ArrayTensor | None = None
+    cdl: ArrayTensor | None = None
+    cdl_mask: ArrayTensor | None = None
+    worldpop: ArrayTensor | None = None
+    worldpop_mask: ArrayTensor | None = None
+    worldcereal: ArrayTensor | None = None
+    worldcereal_mask: ArrayTensor | None = None
+    wri_canopy_height_map: ArrayTensor | None = None
+    wri_canopy_height_map_mask: ArrayTensor | None = None
+    era5_10: ArrayTensor | None = None
+    era5_10_mask: ArrayTensor | None = None
+    def as_dict(self, return_none: bool = True) -> dict[str, Any]:
+        """Convert the namedtuple to a dictionary.
+        Returns:
+            Dictionary representation of the namedtuple.
+        """
+        return_dict = {}
+        for field in self._fields:
+            val = getattr(self, field)
+            if return_none:
+                return_dict[field] = val
+            else:
+                if val is not None:
+                    return_dict[field] = val
+        return return_dict
+    def unmask(self) -> MaskedOlmoEarthSample:
+        """Return an unmasked MaskedOlmoEarthSample.
+        All mask values are MaskValue.ONLINE_ENCODER except for MaskValue.MISSING,
+        which remain MISSING.
+        """
+        return_dict: dict[str, ArrayTensor] = {}
+        for key, val in self.as_dict().items():
+            if val is None:
+                continue
+            if key.endswith("mask"):
+                # 1s where it is missing, 0 elsewhere
+                all_but_missing = val == MaskValue.MISSING.value
+                return_dict[key] = val * all_but_missing
+            else:
+                return_dict[key] = val
+        return MaskedOlmoEarthSample(**return_dict)
+    @property
+    def modalities(self) -> list[str]:
+        """Get the present modalities in this instance of MaskedOlmoEarthSample."""
+        return [
+            field
+            for field in self._fields
+            if not field.endswith("_mask")
+            and field != "timestamps"
+            and getattr(self, field) is not None
+        ]
+    @staticmethod
+    def get_masked_modality_name(modality: str) -> str:
+        """Get the masked modality name."""
+        return f"{modality}_mask"
+    @staticmethod
+    def get_unmasked_modality_name(modality_mask_name: str) -> str:
+        """Get the unmasked modality name."""
+        return modality_mask_name.replace("_mask", "")
+    @classmethod
+    def from_olmoearthsample(
+        cls,
+        sample: Any,  # OlmoEarthSample - not available in minimal repo
+    ) -> MaskedOlmoEarthSample:
+        """Transforms a OlmoEarthSample into a MaskedOlmoEarthSample.
+        This function assumes modalities are uniformly missing.
+        """
+        masked_sample_dict = {}
+        for key, t in sample.as_dict(ignore_nones=False).items():
+            if key == "timestamps":
+                # lets assume timestamps is not None
+                masked_sample_dict[key] = t
+            else:
+                if t is None:
+                    masked_sample_dict[key] = None
+                    masked_sample_dict[
+                        MaskedOlmoEarthSample.get_masked_modality_name(key)
+                    ] = None
+                else:
+                    masked_sample_dict[key] = t
+                    masked_sample_dict[
+                        MaskedOlmoEarthSample.get_masked_modality_name(key)
+                    ] = (
+                        torch.ones(sample.shape(key, mask=False))
+                        * MaskValue.ONLINE_ENCODER.value
+                    )
+        return MaskedOlmoEarthSample(**masked_sample_dict)
+    @classmethod
+    def from_dict(cls, dict: dict[str, Any]) -> MaskedOlmoEarthSample:
+        """Create a MaskedOlmoEarthSample from a dictionary, creating empty tensors for missing modalities.
+        Args:
+            dict: Dictionary representation of the MaskedOlmoEarthSample.
+        """
+        return cls(**dict)