PyPI - cubexpress - Versions diffs - 0.1.4__tar.gz → 0.1.21__tar.gz - Mend

cubexpress 0.1.4tar.gz → 0.1.21tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

{cubexpress-0.1.4 → cubexpress-0.1.21}/PKG-INFO +10 -9
{cubexpress-0.1.4 → cubexpress-0.1.21}/README.md +1 -1
cubexpress-0.1.21/cubexpress/__init__.py +34 -0
cubexpress-0.1.21/cubexpress/cache.py +76 -0
cubexpress-0.1.21/cubexpress/cloud_utils.py +271 -0
cubexpress-0.1.21/cubexpress/conversion.py +147 -0
cubexpress-0.1.21/cubexpress/cube.py +209 -0
cubexpress-0.1.21/cubexpress/downloader.py +115 -0
cubexpress-0.1.21/cubexpress/geospatial.py +207 -0
cubexpress-0.1.21/cubexpress/geotyping.py +306 -0
cubexpress-0.1.21/cubexpress/request.py +112 -0
cubexpress-0.1.21/pyproject.toml +151 -0
cubexpress-0.1.4/cubexpress/__init__.py +0 -25
cubexpress-0.1.4/cubexpress/cache.py +0 -50
cubexpress-0.1.4/cubexpress/cloud_utils.py +0 -256
cubexpress-0.1.4/cubexpress/conversion.py +0 -73
cubexpress-0.1.4/cubexpress/cube.py +0 -84
cubexpress-0.1.4/cubexpress/downloader.py +0 -95
cubexpress-0.1.4/cubexpress/geospatial.py +0 -55
cubexpress-0.1.4/cubexpress/geotyping.py +0 -488
cubexpress-0.1.4/cubexpress/request.py +0 -77
cubexpress-0.1.4/pyproject.toml +0 -100
{cubexpress-0.1.4 → cubexpress-0.1.21}/LICENSE +0 -0

{cubexpress-0.1.4 → cubexpress-0.1.21}/PKG-INFO RENAMED Viewed

@@ -1,13 +1,12 @@
 Metadata-Version: 2.1
 Name: cubexpress
-Version: 0.1.4
+Version: 0.1.21
 Summary: Efficient processing of cubic Earth-observation (EO) data.
 Home-page: https://github.com/andesdatacube/cubexpress
-License: MIT
 Keywords: earth-engine,sentinel-2,geospatial,eo,cube
 Author: Julio Contreras
 Author-email: contrerasnetk@gmail.com
-Requires-Python: >=3.9,<4.0
+Requires-Python: >=3.9
 Classifier: License :: OSI Approved :: MIT License
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.9
@@ -16,14 +15,16 @@ Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3 :: Only
 Classifier: Topic :: Scientific/Engineering :: GIS
-Requires-Dist: earthengine-api (>=0.1.392)
-Requires-Dist: numpy (>=1.25.2)
-Requires-Dist: pandas (>=2.0.3)
+Requires-Dist: earthengine-api (>=1.5.12)
+Requires-Dist: numpy (>=1.22.4,<2.0)
+Requires-Dist: pandas (>=2.0.0)
 Requires-Dist: pyarrow (>=14.0.0)
-Requires-Dist: pygeohash (>=1.2.0,<2.0.0)
+Requires-Dist: pydantic (>=2.0.0)
+Requires-Dist: pygeohash (>=1.2.0)
 Requires-Dist: pyproj (>=3.6.0)
 Requires-Dist: rasterio (>=1.3.9)
-Requires-Dist: utm (>=0.7.0,<0.9.0)
+Requires-Dist: tqdm (>=4.65.0)
+Requires-Dist: utm (>=0.7.0)
 Project-URL: Documentation, https://andesdatacube.github.io/cubexpress
 Project-URL: Repository, https://github.com/andesdatacube/cubexpress
 Description-Content-Type: text/markdown
@@ -31,7 +32,7 @@ Description-Content-Type: text/markdown
 <h1></h1>
 <p align="center">
-  <img src="./docs/logo_cubexpress.png" width="39%">
+  <img src="https://raw.githubusercontent.com/andesdatacube/cubexpress/refs/heads/main/docs/logo_cubexpress.png" width="39%">
 </p>
 <p align="center">

{cubexpress-0.1.4 → cubexpress-0.1.21}/README.md RENAMED Viewed

@@ -1,7 +1,7 @@
 <h1></h1>
 <p align="center">
-  <img src="./docs/logo_cubexpress.png" width="39%">
+  <img src="https://raw.githubusercontent.com/andesdatacube/cubexpress/refs/heads/main/docs/logo_cubexpress.png" width="39%">
 </p>
 <p align="center">

cubexpress-0.1.21/cubexpress/__init__.py ADDED Viewed

@@ -0,0 +1,34 @@
+"""
+CubExpress - Efficient Earth Engine data download and processing.
+Main components:
+- lonlat2rt: Convert coordinates to raster transforms
+- s2_table: Query Sentinel-2 metadata with cloud scores
+- table_to_requestset: Build request sets from metadata
+- get_cube: Download Earth Engine data cubes
+"""
+from __future__ import annotations
+from cubexpress.cloud_utils import s2_table
+from cubexpress.conversion import geo2utm, lonlat2rt
+from cubexpress.cube import get_cube
+from cubexpress.geotyping import RasterTransform, Request, RequestSet
+from cubexpress.request import table_to_requestset
+__all__ = [
+    "lonlat2rt",
+    "geo2utm",
+    "RasterTransform",
+    "Request",
+    "RequestSet",
+    "s2_table",
+    "table_to_requestset",
+    "get_cube",
+]
+try:
+    from importlib.metadata import version
+    __version__ = version("cubexpress")
+except Exception:
+    __version__ = "0.0.0-dev"

cubexpress-0.1.21/cubexpress/cache.py ADDED Viewed

@@ -0,0 +1,76 @@
+"""Caching utilities for Earth Engine query results."""
+from __future__ import annotations
+import hashlib
+import json
+import pathlib
+from cubexpress.config import CACHE_DIR
+CACHE_DIR.mkdir(exist_ok=True, parents=True)
+def _cache_key(
+    lon: float,
+    lat: float,
+    edge_size: int | tuple[int, int],
+    scale: int,
+    collection: str,
+) -> pathlib.Path:
+    """
+    Generate a deterministic cache file path for query parameters.
+    Coordinates are rounded to 4 decimal places (~11m precision) to
+    ensure cache hits for equivalent locations.
+    Args:
+        lon: Longitude of center point
+        lat: Latitude of center point
+        edge_size: ROI size in pixels
+        scale: Pixel resolution in meters
+        collection: Earth Engine collection ID
+    Returns:
+        Path to hashed .parquet cache file
+    """
+    lon_r = round(lon, 4)
+    lat_r = round(lat, 4)
+    edge_tuple = (
+        (edge_size, edge_size) if isinstance(edge_size, int)
+        else tuple(edge_size)
+    )
+    signature = [lon_r, lat_r, edge_tuple, scale, collection]
+    raw = json.dumps(signature, sort_keys=True).encode("utf-8")
+    digest = hashlib.md5(raw).hexdigest()
+    return CACHE_DIR / f"{digest}.parquet"
+def clear_cache() -> int:
+    """
+    Remove all cached query results.
+    Returns:
+        Number of files deleted
+    """
+    count = 0
+    for cache_file in CACHE_DIR.glob("*.parquet"):
+        cache_file.unlink()
+        count += 1
+    return count
+def get_cache_size() -> tuple[int, int]:
+    """
+    Calculate total cache size.
+    Returns:
+        Tuple of (file_count, total_bytes)
+    """
+    files = list(CACHE_DIR.glob("*.parquet"))
+    total_bytes = sum(f.stat().st_size for f in files)
+    return len(files), total_bytes

cubexpress-0.1.21/cubexpress/cloud_utils.py ADDED Viewed

@@ -0,0 +1,271 @@
+from __future__ import annotations
+import datetime as dt
+import time
+import warnings
+import ee
+import pandas as pd
+from cubexpress.cache import _cache_key
+from cubexpress.geospatial import _square_roi
+warnings.filterwarnings('ignore', category=DeprecationWarning)
+# --- CONFIGURATION CONSTANTS ---
+S2_COLLECTION = "COPERNICUS/S2_HARMONIZED"
+S2_CLOUD_COLLECTION = "GOOGLE/CLOUD_SCORE_PLUS/V1/S2_HARMONIZED"
+S2_BANDS = [
+    "B1", "B2", "B3", "B4", "B5", "B6", "B7", "B8", "B8A", "B9", "B10", "B11", "B12"
+]
+S2_PIXEL_SCALE = 10  # meters
+# -------------------------------
+def _cloud_table_single_range(
+    lon: float,
+    lat: float,
+    edge_size: int | tuple[int, int],
+    start: str,
+    end: str
+) -> pd.DataFrame:
+    """
+    Build a daily cloud-score table for a square Sentinel-2 footprint.
+    Query Earth Engine for a specific date range, identifying which images
+    fully contain the ROI and filling missing cloud scores with daily means.
+    Args:
+        lon (float): Longitude of the center point.
+        lat (float): Latitude of the center point.
+        edge_size (int | tuple[int, int]): Side length of the square region
+            in Sentinel-2 pixels (10 m each).
+        start (str): ISO-8601 start date (inclusive), e.g. "2024-06-01".
+        end (str): ISO-8601 end date (inclusive).
+    Returns:
+        pd.DataFrame: A DataFrame with one row per image. Columns include:
+            * id: Sentinel-2 ID.
+            * cs_cdf: Cloud Score Plus CDF (0—1).
+            * date: Acquisition date (YYYY-MM-DD).
+            * inside: 1 if the image fully contains the ROI, 0 otherwise.
+            Note: Missing ``cs_cdf`` values are filled with the mean of the
+            same day if a full-coverage image is not available.
+    Raises:
+        ee.ee_exception.EEException: If Earth Engine fails for reasons other
+            than an empty collection (e.g., quota exceeded, bad request).
+    """
+    # Define ROI (bbox around point)
+    center = ee.Geometry.Point([lon, lat])
+    roi = _square_roi(lon, lat, edge_size, 10)
+    # Query S2
+    s2 = (
+        ee.ImageCollection(S2_COLLECTION)
+        .filterBounds(roi)
+        .filterDate(start, end)
+    )
+    # Cloud Score Plus collection
+    ic = (
+        s2
+        .linkCollection(
+            ee.ImageCollection(S2_CLOUD_COLLECTION),
+            ["cs_cdf"]
+        )
+        .select(["cs_cdf"])
+    )
+    # Identify images whose footprint contains the ROI
+    ids_inside = (
+        ic
+        .map(
+            lambda img: img.set(
+                'roi_inside_scene',
+                img.geometry().contains(roi, maxError=10)
+            )
+        )
+        .filter(ee.Filter.eq('roi_inside_scene', True))
+        .aggregate_array('system:index')
+        .getInfo()
+    )
+    # Generate % cloud of each image over the ROI
+    try:
+        raw = ic.getRegion(
+            geometry=center,
+            scale=(edge_size) * 11 # 10 m pixels plus margin (it's a tricky calculation)
+        ).getInfo()
+    except ee.ee_exception.EEException as e:
+        if "No bands in collection" in str(e):
+            return pd.DataFrame(
+                columns=["id", "longitude", "latitude", "time", "cs_cdf", "inside"]
+            )
+        raise e
+    # Convert raw data to DataFrame
+    df_raw = (
+        pd.DataFrame(raw[1:], columns=raw[0])
+        .drop(columns=["longitude", "latitude"])
+        .assign(
+            date=lambda d: pd.to_datetime(d["id"].str[:8], format="%Y%m%d").dt.strftime("%Y-%m-%d")
+        )
+    )
+    # Mark images whose ROI is fully inside the scene
+    df_raw["inside"] = df_raw["id"].isin(set(ids_inside)).astype(int)
+    # Fill missing cloud scores with daily mean (mosaic approach)
+    df_raw['cs_cdf'] = df_raw.groupby('date').apply(
+        lambda group: group['cs_cdf'].transform(
+            lambda _: group[group['inside'] == 1]['cs_cdf'].iloc[0]
+            if (group['inside'] == 1).any()
+            else group['cs_cdf'].mean()
+        )
+    ).reset_index(drop=True)
+    return df_raw
+def s2_table(
+    lon: float,
+    lat: float,
+    edge_size: int | tuple[int, int],
+    start: str,
+    end: str,
+    max_cscore: float = 1.0,
+    min_cscore: float = 0.0,
+    cache: bool = False
+) -> pd.DataFrame:
+    """
+    Build (and cache) a per-day cloud-table for the requested ROI.
+    The function checks an on-disk parquet cache keyed on location and
+    parameters. If parts of the requested date-range are missing, it fetches
+    only those gaps from Earth Engine, merges them, updates the cache, and
+    finally filters by cloud score thresholds.
+    Args:
+        lon (float): Longitude of the center point.
+        lat (float): Latitude of the center point.
+        edge_size (int | tuple[int, int]): Side length of the square region
+            in Sentinel-2 pixels (10 m each).
+        start (str): ISO-8601 start date, e.g. "2024-06-01".
+        end (str): ISO-8601 end date.
+        max_cscore (float, optional): Maximum allowed cloud score CDF (0.0 to 1.0).
+            Rows above this threshold are dropped. Defaults to 1.0.
+        min_cscore (float, optional): Minimum allowed cloud score CDF (0.0 to 1.0).
+            Defaults to 0.0.
+        cache (bool, optional): If True, enables on-disk parquet caching to
+            avoid re-fetching data for the same parameters. Defaults to False.
+    Returns:
+        pd.DataFrame: Filtered cloud table. The DataFrame contains useful
+            metadata in ``.attrs`` (bands, collection, scale, etc.) needed
+            for downstream functions.
+    """
+    cache_file = _cache_key(lon, lat, edge_size, S2_PIXEL_SCALE, S2_COLLECTION)
+    # Load cached data if present
+    if cache and cache_file.exists():
+        print("📂 Loading cached metadata...", end='', flush=True)
+        t0 = time.time()
+        df_cached = pd.read_parquet(cache_file)
+        have_idx = pd.to_datetime(df_cached["date"], errors="coerce").dropna()
+        cached_start = have_idx.min().date()
+        cached_end = have_idx.max().date()
+        elapsed = time.time() - t0
+        if (
+            dt.date.fromisoformat(start) >= cached_start
+            and dt.date.fromisoformat(end) <= cached_end
+        ):
+            print(f"\r✅ Loaded {len(df_cached)} images from cache ({elapsed:.2f}s)")
+            df_full = df_cached
+        else:
+            print(f"\r📂 Cache loaded ({len(df_cached)} images, {elapsed:.2f}s)")
+            # Identify missing segments and fetch only those.
+            print("⏳ Fetching missing date ranges...", end='', flush=True)
+            t0 = time.time()
+            df_new_parts = []
+            if dt.date.fromisoformat(start) < cached_start:
+                a1, b1 = start, cached_start.isoformat()
+                df_new_parts.append(
+                    _cloud_table_single_range(
+                        lon=lon,
+                        lat=lat,
+                        edge_size=edge_size,
+                        start=a1,
+                        end=b1
+                    )
+                )
+            if dt.date.fromisoformat(end) > cached_end:
+                a2, b2 = cached_end.isoformat(), end
+                df_new_parts.append(
+                    _cloud_table_single_range(
+                        lon=lon,
+                        lat=lat,
+                        edge_size=edge_size,
+                        start=a2,
+                        end=b2
+                    )
+                )
+            df_new_parts = [df for df in df_new_parts if not df.empty]
+            if df_new_parts:
+                df_new = pd.concat(df_new_parts, ignore_index=True)
+                elapsed = time.time() - t0
+                print(f"\r✅ Fetched {len(df_new)} new images ({elapsed:.2f}s)      ")
+                df_full = (
+                    pd.concat([df_cached, df_new], ignore_index=True)
+                    .sort_values("date", kind="mergesort")
+                )
+            else:
+                elapsed = time.time() - t0
+                print(f"\r✅ No new images needed ({elapsed:.2f}s)      ")
+                df_full = df_cached
+    else:
+        print("⏳ Querying Earth Engine metadata...", end='', flush=True)
+        t0 = time.time()
+        df_full = _cloud_table_single_range(
+            lon=lon,
+            lat=lat,
+            edge_size=edge_size,
+            start=start,
+            end=end
+        )
+        elapsed = time.time() - t0
+        n_images = len(df_full)
+        date_range = f"{start} to {end}"
+        actual_start = df_full['date'].min()
+        actual_end = df_full['date'].max()
+        print(f"\r✅ Retrieved {n_images} images from {actual_start} to {actual_end} ({elapsed:.2f}s)")
+    # Save cache
+    if cache:
+        df_full.to_parquet(cache_file, compression="zstd")
+    # Filter by cloud cover and requested date window
+    result = (
+        df_full.query("@start <= date <= @end")
+        .query("@min_cscore <= cs_cdf <= @max_cscore")
+        .reset_index(drop=True)
+    )
+    # Attach metadata for downstream helpers
+    result.attrs.update(
+        {
+            "lon": lon,
+            "lat": lat,
+            "edge_size": edge_size,
+            "scale": S2_PIXEL_SCALE,
+            "bands": S2_BANDS,
+            "collection": S2_COLLECTION
+        }
+    )
+    return result

cubexpress-0.1.21/cubexpress/conversion.py ADDED Viewed

@@ -0,0 +1,147 @@
+"""Coordinate conversion and raster transform utilities."""
+from __future__ import annotations
+import utm
+from pyproj import CRS, Transformer
+from cubexpress.exceptions import ValidationError
+from cubexpress.geotyping import RasterTransform
+def parse_edge_size(edge_size: int | tuple[int, int]) -> tuple[int, int]:
+    """
+    Parse edge_size input into (width, height) tuple.
+    Args:
+        edge_size: Size specification (int for square, tuple for rectangle)
+    Returns:
+        Tuple of (width, height) in pixels
+    Raises:
+        ValidationError: If input is invalid
+    """
+    if isinstance(edge_size, int):
+        if edge_size <= 0:
+            raise ValidationError(f"edge_size must be positive, got {edge_size}")
+        return (edge_size, edge_size)
+    if len(edge_size) != 2:
+        raise ValidationError(
+            f"edge_size tuple must have 2 elements, got {len(edge_size)}"
+        )
+    width, height = edge_size
+    if width <= 0 or height <= 0:
+        raise ValidationError(
+            f"edge_size values must be positive, got {edge_size}"
+        )
+    return (width, height)
+def geo2utm(lon: float, lat: float) -> tuple[float, float, str]:
+    """
+    Convert lat/lon to UTM coordinates and EPSG code.
+    Uses the utm library for standard conversion.
+    Args:
+        lon: Longitude in decimal degrees
+        lat: Latitude in decimal degrees
+    Returns:
+        Tuple of (x, y, epsg_code) where EPSG code is formatted as 'EPSG:XXXXX'
+    Raises:
+        utm.OutOfRangeError: If coordinates are outside valid UTM range
+    """
+    x, y, zone, _ = utm.from_latlon(lat, lon)
+    epsg_code = f"326{zone:02d}" if lat >= 0 else f"327{zone:02d}"
+    return float(x), float(y), f"EPSG:{epsg_code}"
+def lonlat2rt_utm_or_ups(lon: float, lat: float) -> tuple[float, float, str]:
+    """
+    Calculate UTM coordinates using pyproj (fallback for geo2utm).
+    This method is more robust than the utm library and works globally,
+    including near the poles. Uses standard UTM zones for all latitudes
+    to match Google Earth Engine behavior.
+    Args:
+        lon: Longitude in decimal degrees
+        lat: Latitude in decimal degrees
+    Returns:
+        Tuple of (x, y, epsg_code)
+    """
+    zone = int((lon + 180) // 6) + 1
+    epsg_code = 32600 + zone if lat >= 0 else 32700 + zone
+    crs = CRS.from_epsg(epsg_code)
+    transformer = Transformer.from_crs(4326, crs, always_xy=True)
+    x, y = transformer.transform(lon, lat)
+    return float(x), float(y), f"EPSG:{epsg_code}"
+def lonlat2rt(
+    lon: float,
+    lat: float,
+    edge_size: int | tuple[int, int],
+    scale: int
+) -> RasterTransform:
+    """
+    Generate a RasterTransform from geographic coordinates.
+    Converts (lon, lat) to UTM projection and builds geospatial metadata
+    including affine transformation parameters. The Y-scale is negative
+    because raster images have their origin at the top-left corner.
+    Args:
+        lon: Longitude in decimal degrees
+        lat: Latitude in decimal degrees
+        edge_size: Output raster size
+            - int: creates square (width=height=edge_size)
+            - tuple: specifies (width, height) in pixels
+        scale: Spatial resolution in meters per pixel
+    Returns:
+        RasterTransform with CRS, geotransform, and dimensions
+    Examples:
+        >>> rt = lonlat2rt(lon=-76.0, lat=40.0, edge_size=512, scale=30)
+        >>> print(rt.width, rt.height)
+        512 512
+        >>> rt = lonlat2rt(lon=-76.0, lat=40.0, edge_size=(1024, 512), scale=30)
+        >>> print(rt.width, rt.height)
+        1024 512
+    """
+    try:
+        x, y, crs = geo2utm(lon, lat)
+    except Exception:
+        x, y, crs = lonlat2rt_utm_or_ups(lon, lat)
+    width, height = parse_edge_size(edge_size)
+    half_width_m = (width * scale) / 2
+    half_height_m = (height * scale) / 2
+    geotransform = {
+        "scaleX": scale,
+        "shearX": 0,
+        "translateX": x - half_width_m,
+        "scaleY": -scale,
+        "shearY": 0,
+        "translateY": y + half_height_m,
+    }
+    return RasterTransform(
+        crs=crs,
+        geotransform=geotransform,
+        width=width,
+        height=height
+    )

cubexpress 0.1.4__tar.gz → 0.1.21__tar.gz

cubexpress 0.1.4tar.gz → 0.1.21tar.gz