PyPI - satcube - Versions diffs - 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl - Mend

satcube 0.1.0py3-none-any.whl → 0.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of satcube might be problematic. Click here for more details.

Files changed (14) hide show

satcube/cloud_detection.py +154 -18
satcube/cloud_detection_old.py +24 -0
satcube/download.py +65 -0
satcube/download_old.py +82 -0
satcube/main.py +2 -2
satcube/utils.py +64 -1081
satcube/utils_old.py +1087 -0
satcube-0.1.2.dist-info/METADATA +224 -0
satcube-0.1.2.dist-info/RECORD +12 -0
{satcube-0.1.0.dist-info → satcube-0.1.2.dist-info}/WHEEL +1 -1
satcube/__init__.py +0 -3
satcube-0.1.0.dist-info/METADATA +0 -31
satcube-0.1.0.dist-info/RECORD +0 -9
{satcube-0.1.0.dist-info → satcube-0.1.2.dist-info}/LICENSE +0 -0

satcube/cloud_detection.py CHANGED Viewed

@@ -1,24 +1,160 @@
+"""Predict cloud masks for Sentinel-2 GeoTIFFs with the SEN2CloudEnsemble model.
+The callable :pyfunc:`cloud_masking` accepts **either** a single ``.tif`` file
+or a directory tree; in both cases it writes a masked copy of every image (and,
+optionally, the binary mask) to *output*.
+Example
+-------
+>>> from satcube.cloud_detection import cloud_masking
+>>> cloud_masking("~/s2/input", "~/s2/output", device="cuda")
+"""
+from __future__ import annotations
+import time
+from pathlib import Path
+from typing import List
+import mlstac
+import numpy as np
+import rasterio as rio
 import torch
-class LandsatCloudDetector(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
+from satcube.utils import DeviceManager, _reset_gpu
+def cloud_masking(
+    input: str | Path,              # noqa: A002 (shadowing built-in is OK here)
+    output: str | Path,
+    *,
+    tile: int = 512,
+    pad: int = 64,
+    save_mask: bool = False,
+    device: str = "cpu",
+    max_pix_cpu: float = 7.0e7,
+) -> List[Path]:
+    """Write cloud-masked Sentinel-2 images.
+    Parameters
+    ----------
+    input
+        Path to a single ``.tif`` file **or** a directory containing them.
+    output
+        Destination directory (created if missing).
+    tile, pad
+        Tile size and padding (pixels) when tiling is required.
+    save_mask
+        If *True*, store the binary mask alongside the masked image.
+    device
+        Torch device for inference, e.g. ``"cpu"`` or ``"cuda:0"``.
+    max_pix_cpu
+        Tile images larger than this when running on CPU.
+    Returns
+    -------
+    list[pathlib.Path]
+        Paths to the generated masked images.
+    """
+    t_start = time.perf_counter()
+    src = Path(input).expanduser().resolve()
+    dst_dir = Path(output).expanduser().resolve()
+    dst_dir.mkdir(parents=True, exist_ok=True)
+    # Collect files to process -------------------------------------------------
+    tif_paths: list[Path]
+    if src.is_dir():
+        tif_paths = [p for p in src.rglob("*.tif")]
+    elif src.is_file() and src.suffix.lower() == ".tif":
+        tif_paths = [src]
+        src = src.parent  # for relative-path bookkeeping below
+    else:
+        raise ValueError(f"Input must be a .tif or directory, got: {src}")
+    if not tif_paths:
+        print(f"[cloud_masking] No .tif files found in {src}")
+        return []
+    experiment = mlstac.load("SEN2CloudEnsemble")
+    dm = DeviceManager(experiment, init_device=device)
+    masked_paths: list[Path] = []
+    # -------------------------------------------------------------------------
+    for idx, tif_path in enumerate(tif_paths, 1):
+        rel = tif_path.relative_to(src)
+        out_dir = dst_dir / rel.parent
+        out_dir.mkdir(parents=True, exist_ok=True)
+        mask_path = out_dir / f"{tif_path.stem}_cloudmask.tif"
+        masked_path = out_dir / f"{tif_path.stem}_masked.tif"
+        with rio.open(tif_path) as src_img:
+            profile = src_img.profile
+            h, w = src_img.height, src_img.width
+        mask_prof = profile.copy()
+        mask_prof.update(driver="GTiff", count=1, dtype="uint8", nodata=255)
+        do_tiling = (dm.device == "cuda") or (h * w > max_pix_cpu)
+        full_mask = np.full((h, w), 255, np.uint8)
+        t0 = time.perf_counter()
+        # ----------------------- inference -----------------------------------
+        if not do_tiling:  # full frame
+            with rio.open(tif_path) as src_img, torch.inference_mode():
+                img = src_img.read().astype(np.float32) / 1e4
+                h32, w32 = (h + 31) // 32 * 32, (w + 31) // 32 * 32
+                pad_b, pad_r = h32 - h, w32 - w
+                tensor = torch.from_numpy(img).unsqueeze(0)
+                if pad_b or pad_r:
+                    tensor = torch.nn.functional.pad(tensor, (0, pad_r, 0, pad_b))
+                mask = dm.model(tensor.to(dm.device)).squeeze(0)
+                full_mask[:] = mask[..., :h, :w].cpu().numpy().astype(np.uint8)
+        else:  # tiled
+            with rio.open(tif_path) as src_img, torch.inference_mode():
+                for y0 in range(0, h, tile):
+                    for x0 in range(0, w, tile):
+                        y0r, x0r = max(0, y0 - pad), max(0, x0 - pad)
+                        y1r, x1r = min(h, y0 + tile + pad), min(w, x0 + tile + pad)
+                        win = rio.windows.Window(x0r, y0r, x1r - x0r, y1r - y0r)
+                        patch = src_img.read(window=win).astype(np.float32) / 1e4
+                        tensor = torch.from_numpy(patch).unsqueeze(0).to(dm.device)
+                        mask = dm.model(tensor).squeeze(0).cpu().numpy().astype(np.uint8)
+                        y_in0 = pad if y0r else 0
+                        x_in0 = pad if x0r else 0
+                        y_in1 = mask.shape[0] - (pad if y1r < h else 0)
+                        x_in1 = mask.shape[1] - (pad if x1r < w else 0)
+                        core = mask[y_in0:y_in1, x_in0:x_in1]
+                        full_mask[y0 : y0 + core.shape[0], x0 : x0 + core.shape[1]] = core
+        # ----------------------- output --------------------------------------
+        if save_mask:
+            with rio.open(mask_path, "w", **mask_prof) as dst:
+                dst.write(full_mask, 1)
+        with rio.open(tif_path) as src_img:
+            data = src_img.read()
+            img_prof = src_img.profile.copy()
+        masked = data.copy()
+        masked[:, full_mask != 0] = 65535
+        img_prof.update(dtype="uint16", nodata=65535)
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        # Define bit flags for clouds based on the
-        # Landsat QA band documentation
-        cloud_flags = (1 << 3) | (1 << 4) | (1 << 1)
+        with rio.open(masked_path, "w", **img_prof) as dst:
+            dst.write(masked)
-        ## Get the QA band
-        qa_band = x[6]
-        mask_band = x[:6].mean(axis=0)
-        mask_band[~torch.isnan(mask_band)] = 1
+        masked_paths.append(masked_path)
+        dt = time.perf_counter() - t0
+        print(f"[{idx}/{len(tif_paths)}] {rel} → done in {dt:.1f}s")
-        ## Create a cloud mask
-        cloud_mask = torch.bitwise_and(qa_band.int(), cloud_flags) == 0
-        cloud_mask = cloud_mask.float()
-        cloud_mask[cloud_mask == 0] = torch.nan
-        cloud_mask[cloud_mask == 0] = 1
-        final_mask = cloud_mask * mask_band
-        return final_mask
+    if dm.device == "cuda":
+        _reset_gpu()
+    total_time = time.perf_counter() - t_start
+    print(f"Processed {len(masked_paths)} image(s) in {total_time:.1f}s.")
+    return masked_paths

satcube/cloud_detection_old.py ADDED Viewed

@@ -0,0 +1,24 @@
+import torch
+class LandsatCloudDetector(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        # Define bit flags for clouds based on the
+        # Landsat QA band documentation
+        cloud_flags = (1 << 3) | (1 << 4) | (1 << 1)
+        ## Get the QA band
+        qa_band = x[6]
+        mask_band = x[:6].mean(axis=0)
+        mask_band[~torch.isnan(mask_band)] = 1
+        ## Create a cloud mask
+        cloud_mask = torch.bitwise_and(qa_band.int(), cloud_flags) == 0
+        cloud_mask = cloud_mask.float()
+        cloud_mask[cloud_mask == 0] = torch.nan
+        cloud_mask[cloud_mask == 0] = 1
+        final_mask = cloud_mask * mask_band
+        return final_mask

satcube/download.py ADDED Viewed

@@ -0,0 +1,65 @@
+import pathlib
+import ee
+import cubexpress
+import pandas as pd
+def download_data(
+    *,                     # keyword-only
+    lon: float,
+    lat: float,
+    cloud_max: int = 40,
+    edge_size: int = 2_048,
+    start: str,
+    end: str,
+    output: str = "raw",
+    scale: int = 10,
+    nworks: int = 4,
+    mosaic: bool = True,
+    auto_init_gee: bool = True,
+) -> pd.DataFrame:
+    """
+    Download a Sentinel cube for (lon, lat) and return its metadata.
+    Parameters
+    ----------
+    lon, lat        Center point in degrees.
+    cloud_max       Max cloud cover (%).
+    edge_size       Square side length (m).
+    start, end      YYYY-MM-DD date range.
+    output          Folder for GeoTIFFs.
+    scale           Pixel size (m).
+    nworks          Parallel workers.
+    mosaic          Merge scenes per date.
+    auto_init_gee   Call ee.Initialize() if needed.
+    Returns
+    -------
+    pandas.DataFrame
+        Scene catalogue used for the request.
+    """
+    # EE ready
+    if auto_init_gee:
+        try:
+            ee.Initialize()
+        except ee.EEException:
+            ee.Authenticate(); ee.Initialize()
+    # Filter scenes
+    df = cubexpress.cloud_table(
+        lon=lon,
+        lat=lat,
+        edge_size=edge_size,
+        scale=scale,
+        cloud_max=cloud_max,
+        start=start,
+        end=end,
+    )
+    # Build requests + ensure dir
+    requests = cubexpress.table_to_requestset(df, mosaic=mosaic)
+    pathlib.Path(output).mkdir(parents=True, exist_ok=True)
+    # Download cube
+    cubexpress.get_cube(requests, output, nworks)
+    return df

satcube/download_old.py ADDED Viewed

@@ -0,0 +1,82 @@
+import ee
+import cubexpress
+import pathlib
+from typing import Optional
+from datetime import datetime
+def download_data(
+    lon: float,
+    lat: float,
+    cs_cdf: Optional[float] = 0.6,
+    buffer_size: Optional[int] = 1280,
+    start_date: Optional[str] = "2015-01-01",
+    end_date: Optional[str] = datetime.today().strftime('%Y-%m-%d'),
+    outfolder: Optional[str] = "raw/"
+) -> pathlib.Path:
+    """
+    Download Sentinel-2 imagery data using cubexpress and Earth Engine API.
+    Args:
+        lon (float): Longitude of the point of interest.
+        lat (float): Latitude of the point of interest.
+        cs_cdf (Optional[float]): Cloud mask threshold (default 0.6).
+        buffer_size (Optional[int]): Buffer size for image extraction (default 1280).
+        start_date (Optional[str]): Start date for image filtering (default "2015-01-01").
+        end_date (Optional[str]): End date for image filtering (default today’s date).
+        outfolder (Optional[str]): Output folder to save images (default "raw/").
+    Returns:
+        pathlib.Path: Path to the folder where the data is stored.
+    """
+    # Initialize Earth Engine
+    ee.Initialize(project="ee-julius013199")
+    # Define point of interest
+    point = ee.Geometry.Point([lon, lat])
+    # Filter image collection by location and date
+    collection = ee.ImageCollection("COPERNICUS/S2_SR_HARMONIZED") \
+                    .filterBounds(point) \
+                    .filterDate(start_date, end_date)
+    # Get image IDs
+    image_ids = collection.aggregate_array('system:id').getInfo()
+    # Cloud mask function
+    def cloud_mask(image) -> ee.Image:
+        """Apply cloud mask to the image."""
+        return image.select('MSK_CLDPRB').lt(20)
+    # Apply cloud mask
+    collection = collection.map(cloud_mask)
+    # Generate geotransform for cubexpress
+    geotransform = cubexpress.lonlat2rt(lon=lon, lat=lat, edge_size=buffer_size, scale=10)
+    # Prepare requests for cubexpress
+    requests = [
+        cubexpress.Request(
+            id=f"s2test_{i}",
+            raster_transform=geotransform,
+            bands=["B4", "B3", "B2"],  # RGB bands
+            image=ee.Image(image_id).divide(10000)  # Adjust image scaling
+        )
+        for i, image_id in enumerate(image_ids)
+    ]
+    # Create request set
+    cube_requests = cubexpress.RequestSet(requestset=requests)
+    # Set output folder
+    output_path = pathlib.Path(outfolder)
+    # Download the data
+    cubexpress.getcube(
+        request=cube_requests,
+        output_path=output_path,
+        nworkers=4,
+        max_deep_level=5
+    )
+    return output_path

satcube/main.py CHANGED Viewed

@@ -7,7 +7,7 @@ import pandas as pd
 import torch
 from satcube.dataclass import Sensor
-from satcube.utils import (aligned_s2, cloudmasking_s2, display_images,
+from satcube.utils_old import (aligned_s2, cloudmasking_s2, display_images,
                            gapfilling_s2, intermediate_process, interpolate_s2,
                            metadata_s2, monthly_composites_s2, smooth_s2, super_s2)
@@ -252,7 +252,7 @@ class SatCube:
         out_table["folder"] = out_folder
         return out_table
     def monthly_composites_s2(
         self,
         table: Optional[pd.DataFrame],

satcube 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

Potentially problematic release.

satcube 0.1.0py3-none-any.whl → 0.1.2py3-none-any.whl