PyPI - cubexpress - Versions diffs - 0.1.4__tar.gz → 0.1.18__tar.gz - Mend

cubexpress 0.1.4tar.gz → 0.1.18tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

{cubexpress-0.1.4 → cubexpress-0.1.18}/PKG-INFO +9 -9
{cubexpress-0.1.4 → cubexpress-0.1.18}/README.md +1 -1
{cubexpress-0.1.4 → cubexpress-0.1.18}/cubexpress/__init__.py +4 -10
cubexpress-0.1.18/cubexpress/cache.py +52 -0
cubexpress-0.1.18/cubexpress/cloud_utils.py +268 -0
cubexpress-0.1.18/cubexpress/conversion.py +156 -0
cubexpress-0.1.18/cubexpress/cube.py +240 -0
cubexpress-0.1.18/cubexpress/downloader.py +97 -0
cubexpress-0.1.18/cubexpress/geospatial.py +195 -0
{cubexpress-0.1.4 → cubexpress-0.1.18}/cubexpress/geotyping.py +93 -183
cubexpress-0.1.18/cubexpress/request.py +120 -0
{cubexpress-0.1.4 → cubexpress-0.1.18}/pyproject.toml +9 -9
cubexpress-0.1.4/cubexpress/cache.py +0 -50
cubexpress-0.1.4/cubexpress/cloud_utils.py +0 -256
cubexpress-0.1.4/cubexpress/conversion.py +0 -73
cubexpress-0.1.4/cubexpress/cube.py +0 -84
cubexpress-0.1.4/cubexpress/downloader.py +0 -95
cubexpress-0.1.4/cubexpress/geospatial.py +0 -55
cubexpress-0.1.4/cubexpress/request.py +0 -77
{cubexpress-0.1.4 → cubexpress-0.1.18}/LICENSE +0 -0

{cubexpress-0.1.4 → cubexpress-0.1.18}/PKG-INFO RENAMED Viewed

@@ -1,13 +1,12 @@
 Metadata-Version: 2.1
 Name: cubexpress
-Version: 0.1.4
+Version: 0.1.18
 Summary: Efficient processing of cubic Earth-observation (EO) data.
 Home-page: https://github.com/andesdatacube/cubexpress
-License: MIT
 Keywords: earth-engine,sentinel-2,geospatial,eo,cube
 Author: Julio Contreras
 Author-email: contrerasnetk@gmail.com
-Requires-Python: >=3.9,<4.0
+Requires-Python: >=3.9
 Classifier: License :: OSI Approved :: MIT License
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.9
@@ -16,14 +15,15 @@ Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3 :: Only
 Classifier: Topic :: Scientific/Engineering :: GIS
-Requires-Dist: earthengine-api (>=0.1.392)
-Requires-Dist: numpy (>=1.25.2)
-Requires-Dist: pandas (>=2.0.3)
+Requires-Dist: earthengine-api (>=1.5.12)
+Requires-Dist: numpy (>=2.0.2)
+Requires-Dist: pandas (>=2.2.2)
 Requires-Dist: pyarrow (>=14.0.0)
-Requires-Dist: pygeohash (>=1.2.0,<2.0.0)
+Requires-Dist: pydantic (>=2.11.4)
+Requires-Dist: pygeohash (>=1.2.0)
 Requires-Dist: pyproj (>=3.6.0)
 Requires-Dist: rasterio (>=1.3.9)
-Requires-Dist: utm (>=0.7.0,<0.9.0)
+Requires-Dist: utm (>=0.7.0)
 Project-URL: Documentation, https://andesdatacube.github.io/cubexpress
 Project-URL: Repository, https://github.com/andesdatacube/cubexpress
 Description-Content-Type: text/markdown
@@ -31,7 +31,7 @@ Description-Content-Type: text/markdown
 <h1></h1>
 <p align="center">
-  <img src="./docs/logo_cubexpress.png" width="39%">
+  <img src="https://raw.githubusercontent.com/andesdatacube/cubexpress/refs/heads/main/docs/logo_cubexpress.png" width="39%">
 </p>
 <p align="center">

{cubexpress-0.1.4 → cubexpress-0.1.18}/README.md RENAMED Viewed

@@ -1,7 +1,7 @@
 <h1></h1>
 <p align="center">
-  <img src="./docs/logo_cubexpress.png" width="39%">
+  <img src="https://raw.githubusercontent.com/andesdatacube/cubexpress/refs/heads/main/docs/logo_cubexpress.png" width="39%">
 </p>
 <p align="center">

{cubexpress-0.1.4 → cubexpress-0.1.18}/cubexpress/__init__.py RENAMED Viewed

@@ -1,13 +1,11 @@
 from cubexpress.conversion import lonlat2rt, geo2utm
 from cubexpress.geotyping import RasterTransform, Request, RequestSet
-from cubexpress.cloud_utils import cloud_table
+from cubexpress.cloud_utils import s2_table
 from cubexpress.cube import get_cube
 from cubexpress.request import table_to_requestset
+# import importlib.metadata
-# pyproj
-# Export the functions
 __all__ = [
     "lonlat2rt",
     "RasterTransform",
@@ -15,11 +13,7 @@ __all__ = [
     "RequestSet",
     "geo2utm",
     "get_cube",
-    "cloud_table",
+    "s2_table",
     "table_to_requestset"
 ]
-# Dynamic version import
-import importlib.metadata
-__version__ = importlib.metadata.version("cubexpress")
+# __version__ = importlib.metadata.version("cubexpress")

cubexpress-0.1.18/cubexpress/cache.py ADDED Viewed

@@ -0,0 +1,52 @@
+import hashlib
+import json
+import os
+import pathlib
+from typing import Final
+# Directory for storing cached metadata files (configurable via env var)
+_CACHE_DIR: Final[pathlib.Path] = pathlib.Path(
+    os.getenv("CUBEXPRESS_CACHE", "~/.cubexpress_cache")
+).expanduser()
+_CACHE_DIR.mkdir(exist_ok=True)
+def _cache_key(
+    lon: float,
+    lat: float,
+    edge_size: int | tuple[int, int],
+    scale: int,
+    collection: str,
+) -> pathlib.Path:
+    """
+    Generates a deterministic file path for caching query results.
+    Hashes the query parameters to create a unique filename. Coordinates
+    are rounded to 4 decimals to ensure cache hits on equivalent locations.
+    Args:
+        lon (float): Longitude of the center point.
+        lat (float): Latitude of the center point.
+        edge_size (int | Tuple[int, int]): Size of the ROI in pixels.
+        scale (int): Pixel resolution in meters.
+        collection (str): Earth Engine collection ID.
+    Returns:
+        pathlib.Path: Full path to the hashed .parquet cache file.
+    """
+    # Round coordinates to ~11m precision to group nearby requests
+    lon_r, lat_r = round(lon, 4), round(lat, 4)
+    # Normalize edge_size to tuple for consistent hashing
+    if isinstance(edge_size, int):
+        edge_tuple = (edge_size, edge_size)
+    else:
+        edge_tuple = edge_size
+    # Create a unique signature for this request configuration
+    signature = [lon_r, lat_r, edge_tuple, scale, collection]
+    # Use MD5 to generate a short, filesystem-friendly filename
+    raw = json.dumps(signature).encode("utf-8")
+    digest = hashlib.md5(raw).hexdigest()
+    return _CACHE_DIR / f"{digest}.parquet"

cubexpress-0.1.18/cubexpress/cloud_utils.py ADDED Viewed

@@ -0,0 +1,268 @@
+from __future__ import annotations
+import datetime as dt
+import sys
+import time
+import ee
+import pandas as pd
+from cubexpress.cache import _cache_key
+import datetime as dt
+from cubexpress.geospatial import _square_roi
+import warnings
+warnings.filterwarnings('ignore', category=DeprecationWarning)
+# --- CONFIGURATION CONSTANTS ---
+S2_COLLECTION = "COPERNICUS/S2_HARMONIZED"
+S2_CLOUD_COLLECTION = "GOOGLE/CLOUD_SCORE_PLUS/V1/S2_HARMONIZED"
+S2_BANDS = [
+    "B1", "B2", "B3", "B4", "B5", "B6", "B7", "B8", "B8A", "B9", "B10", "B11", "B12"
+]
+S2_PIXEL_SCALE = 10  # meters
+# -------------------------------
+def _cloud_table_single_range(
+    lon: float,
+    lat: float,
+    edge_size: int | tuple[int, int],
+    start: str,
+    end: str
+) -> pd.DataFrame:
+    """
+    Build a daily cloud-score table for a square Sentinel-2 footprint.
+    Query Earth Engine for a specific date range, identifying which images
+    fully contain the ROI and filling missing cloud scores with daily means.
+    Args:
+        lon (float): Longitude of the center point.
+        lat (float): Latitude of the center point.
+        edge_size (int | tuple[int, int]): Side length of the square region
+            in Sentinel-2 pixels (10 m each).
+        start (str): ISO-8601 start date (inclusive), e.g. "2024-06-01".
+        end (str): ISO-8601 end date (inclusive).
+    Returns:
+        pd.DataFrame: A DataFrame with one row per image. Columns include:
+            * id: Sentinel-2 ID.
+            * cs_cdf: Cloud Score Plus CDF (0—1).
+            * date: Acquisition date (YYYY-MM-DD).
+            * inside: 1 if the image fully contains the ROI, 0 otherwise.
+            Note: Missing ``cs_cdf`` values are filled with the mean of the
+            same day if a full-coverage image is not available.
+    Raises:
+        ee.ee_exception.EEException: If Earth Engine fails for reasons other
+            than an empty collection (e.g., quota exceeded, bad request).
+    """
+    # Define ROI (bbox around point)
+    center = ee.Geometry.Point([lon, lat])
+    roi = _square_roi(lon, lat, edge_size, 10)
+    # Query S2
+    s2 = (
+        ee.ImageCollection(S2_COLLECTION)
+        .filterBounds(roi)
+        .filterDate(start, end)
+    )
+    # Cloud Score Plus collection
+    ic = (
+        s2
+        .linkCollection(
+            ee.ImageCollection(S2_CLOUD_COLLECTION),
+            ["cs_cdf"]
+        )
+        .select(["cs_cdf"])
+    )
+    # Identify images whose footprint contains the ROI
+    ids_inside = (
+        ic
+        .map(
+            lambda img: img.set(
+                'roi_inside_scene',
+                img.geometry().contains(roi, maxError=10)
+            )
+        )
+        .filter(ee.Filter.eq('roi_inside_scene', True))
+        .aggregate_array('system:index')
+        .getInfo()
+    )
+    # Generate % cloud of each image over the ROI
+    try:
+        raw = ic.getRegion(
+            geometry=center,
+            scale=(edge_size) * 11 # 10 m pixels plus margin (it's a tricky calculation)
+        ).getInfo()
+    except ee.ee_exception.EEException as e:
+        if "No bands in collection" in str(e):
+            return pd.DataFrame(
+                columns=["id", "longitude", "latitude", "time", "cs_cdf", "inside"]
+            )
+        raise e
+    # Convert raw data to DataFrame
+    df_raw = (
+        pd.DataFrame(raw[1:], columns=raw[0])
+        .drop(columns=["longitude", "latitude"])
+        .assign(
+            date=lambda d: pd.to_datetime(d["id"].str[:8], format="%Y%m%d").dt.strftime("%Y-%m-%d")
+        )
+    )
+    # Mark images whose ROI is fully inside the scene
+    df_raw["inside"] = df_raw["id"].isin(set(ids_inside)).astype(int)
+    # Fill missing cloud scores with daily mean (mosaic approach)
+    df_raw['cs_cdf'] = df_raw.groupby('date').apply(
+        lambda group: group['cs_cdf'].transform(
+            lambda _: group[group['inside'] == 1]['cs_cdf'].iloc[0]
+            if (group['inside'] == 1).any()
+            else group['cs_cdf'].mean()
+        )
+    ).reset_index(drop=True)
+    return df_raw
+def s2_table(
+    lon: float,
+    lat: float,
+    edge_size: int | tuple[int, int],
+    start: str,
+    end: str,
+    max_cscore: float = 1.0,
+    min_cscore: float = 0.0,
+    cache: bool = False
+) -> pd.DataFrame:
+    """
+    Build (and cache) a per-day cloud-table for the requested ROI.
+    The function checks an on-disk parquet cache keyed on location and
+    parameters. If parts of the requested date-range are missing, it fetches
+    only those gaps from Earth Engine, merges them, updates the cache, and
+    finally filters by cloud score thresholds.
+    Args:
+        lon (float): Longitude of the center point.
+        lat (float): Latitude of the center point.
+        edge_size (int | tuple[int, int]): Side length of the square region
+            in Sentinel-2 pixels (10 m each).
+        start (str): ISO-8601 start date, e.g. "2024-06-01".
+        end (str): ISO-8601 end date.
+        max_cscore (float, optional): Maximum allowed cloud score CDF (0.0 to 1.0).
+            Rows above this threshold are dropped. Defaults to 1.0.
+        min_cscore (float, optional): Minimum allowed cloud score CDF (0.0 to 1.0).
+            Defaults to 0.0.
+        cache (bool, optional): If True, enables on-disk parquet caching to
+            avoid re-fetching data for the same parameters. Defaults to False.
+    Returns:
+        pd.DataFrame: Filtered cloud table. The DataFrame contains useful
+            metadata in ``.attrs`` (bands, collection, scale, etc.) needed
+            for downstream functions.
+    """
+    cache_file = _cache_key(lon, lat, edge_size, S2_PIXEL_SCALE, S2_COLLECTION)
+    # Load cached data if present
+    if cache and cache_file.exists():
+        print("📂 Loading cached metadata...", end='', flush=True)
+        t0 = time.time()
+        df_cached = pd.read_parquet(cache_file)
+        have_idx = pd.to_datetime(df_cached["date"], errors="coerce").dropna()
+        cached_start = have_idx.min().date()
+        cached_end = have_idx.max().date()
+        elapsed = time.time() - t0
+        if (
+            dt.date.fromisoformat(start) >= cached_start
+            and dt.date.fromisoformat(end) <= cached_end
+        ):
+            print(f"\r✅ Loaded {len(df_cached)} images from cache ({elapsed:.2f}s)")
+            df_full = df_cached
+        else:
+            print(f"\r📂 Cache loaded ({len(df_cached)} images, {elapsed:.2f}s)")
+            # Identify missing segments and fetch only those.
+            print("⏳ Fetching missing date ranges...", end='', flush=True)
+            t0 = time.time()
+            df_new_parts = []
+            if dt.date.fromisoformat(start) < cached_start:
+                a1, b1 = start, cached_start.isoformat()
+                df_new_parts.append(
+                    _cloud_table_single_range(
+                        lon=lon,
+                        lat=lat,
+                        edge_size=edge_size,
+                        start=a1,
+                        end=b1
+                    )
+                )
+            if dt.date.fromisoformat(end) > cached_end:
+                a2, b2 = cached_end.isoformat(), end
+                df_new_parts.append(
+                    _cloud_table_single_range(
+                        lon=lon,
+                        lat=lat,
+                        edge_size=edge_size,
+                        start=a2,
+                        end=b2
+                    )
+                )
+            df_new_parts = [df for df in df_new_parts if not df.empty]
+            if df_new_parts:
+                df_new = pd.concat(df_new_parts, ignore_index=True)
+                elapsed = time.time() - t0
+                print(f"\r✅ Fetched {len(df_new)} new images ({elapsed:.2f}s)      ")
+                df_full = (
+                    pd.concat([df_cached, df_new], ignore_index=True)
+                    .sort_values("date", kind="mergesort")
+                )
+            else:
+                elapsed = time.time() - t0
+                print(f"\r✅ No new images needed ({elapsed:.2f}s)      ")
+                df_full = df_cached
+    else:
+        print("⏳ Querying Earth Engine metadata...", end='', flush=True)
+        t0 = time.time()
+        df_full = _cloud_table_single_range(
+            lon=lon,
+            lat=lat,
+            edge_size=edge_size,
+            start=start,
+            end=end
+        )
+        elapsed = time.time() - t0
+        n_images = len(df_full)
+        date_range = f"{start} to {end}"
+        print(f"\r✅ Retrieved {n_images} images from {date_range} ({elapsed:.2f}s)")
+    # Save cache
+    if cache:
+        df_full.to_parquet(cache_file, compression="zstd")
+    # Filter by cloud cover and requested date window
+    result = (
+        df_full.query("@start <= date <= @end")
+        .query("@min_cscore <= cs_cdf <= @max_cscore")
+        .reset_index(drop=True)
+    )
+    # Attach metadata for downstream helpers
+    result.attrs.update(
+        {
+            "lon": lon,
+            "lat": lat,
+            "edge_size": edge_size,
+            "scale": S2_PIXEL_SCALE,
+            "bands": S2_BANDS,
+            "collection": S2_COLLECTION
+        }
+    )
+    return result

cubexpress-0.1.18/cubexpress/conversion.py ADDED Viewed

@@ -0,0 +1,156 @@
+import utm
+from pyproj import CRS, Transformer
+from cubexpress.geotyping import RasterTransform
+def parse_edge_size(edge_size: int | tuple[int, int]) -> tuple[int, int]:
+    """
+    Parse edge_size input into (width, height) tuple.
+    Args:
+        edge_size: Size specification
+    Returns:
+        tuple[int, int]: (width, height) in pixels
+    Raises:
+        ValueError: If tuple length != 2 or values <= 0
+    """
+    if isinstance(edge_size, int):
+        if edge_size <= 0:
+            raise ValueError(f"edge_size must be positive, got {edge_size}")
+        return (edge_size, edge_size)
+    else:
+        if len(edge_size) != 2:
+            raise ValueError(f"edge_size tuple must have 2 elements, got {len(edge_size)}")
+        width, height = edge_size
+        if width <= 0 or height <= 0:
+            raise ValueError(f"edge_size values must be positive, got {edge_size}")
+        return (width, height)
+def geo2utm(
+    lon: float,
+    lat: float
+) -> tuple[float, float, str]:
+    """
+    Converts latitude and longitude coordinates to UTM coordinates and returns the EPSG code.
+    Args:
+        lon (float): Longitude in decimal degrees.
+        lat (float): Latitude in decimal degrees.
+    Returns:
+        tuple[float, float, str]: UTM coordinates (x, y) in meters and EPSG code as string.
+    Raises:
+        utm.OutOfRangeError: If coordinates are outside valid UTM range.
+    """
+    x, y, zone, _ = utm.from_latlon(lat, lon)
+    epsg_code = f"326{zone:02d}" if lat >= 0 else f"327{zone:02d}"
+    return float(x), float(y), f"EPSG:{epsg_code}"
+def lonlat2rt_utm_or_ups(
+    lon: float,
+    lat: float
+) -> tuple[float, float, str]:
+    """
+    Calculate UTM coordinates using pyproj (fallback for geo2utm).
+    Uses standard UTM zones for all latitudes, matching GEE behavior.
+    This method is more robust than the utm library and works globally.
+    Note:
+        UTM is designed for [-80°, 84°] but works globally with
+        acceptable distortions for small tiles.
+    Args:
+        lon (float): Longitude in decimal degrees.
+        lat (float): Latitude in decimal degrees.
+    Returns:
+        tuple[float, float, str]: UTM coordinates (x, y) in meters and EPSG code as string.
+    """
+    zone = int((lon + 180) // 6) + 1
+    epsg_code = 32600 + zone if lat >= 0 else 32700 + zone
+    crs = CRS.from_epsg(epsg_code)
+    to_xy = Transformer.from_crs(4326, crs, always_xy=True)
+    x, y = to_xy.transform(lon, lat)
+    return float(x), float(y), f"EPSG:{epsg_code}"
+def lonlat2rt(
+    lon: float,
+    lat: float,
+    edge_size: int | tuple[int, int],
+    scale: int
+) -> RasterTransform:
+    """
+    Generates a ``RasterTransform`` for a given point by converting geographic (lon, lat) coordinates
+    to UTM projection and building the necessary geotransform metadata.
+    This function:
+      1. Converts the input (lon, lat) to UTM coordinates using :func:`geo2utm`.
+      2. If that fails (e.g., near poles), falls back to pyproj-based calculation.
+      3. Defines the extent of the raster in UTM meters based on the specified dimensions
+         and ``scale`` (meters per pixel).
+      4. Sets the Y-scale to be negative (``-scale``) because geospatial images typically consider
+         the origin at the top-left corner, resulting in a downward Y axis.
+    Args:
+        lon (float): Longitude in decimal degrees.
+        lat (float): Latitude in decimal degrees.
+        edge_size (int | tuple[int, int]): Size of the output raster.
+            If int, creates a square (width=height=edge_size).
+            If tuple, specifies (width, height) in pixels.
+        scale (int): Spatial resolution in meters per pixel.
+    Returns:
+        RasterTransform: A Pydantic model containing:
+         - ``crs``: The EPSG code in the form ``"EPSG:XYZ"``,
+         - ``geotransform``: A dictionary with the affine transform parameters,
+         - ``width`` and ``height``.
+    Examples:
+        Square raster:
+        >>> rt = cubexpress.lonlat2rt(
+        ...     lon=-76.0, lat=40.0,
+        ...     edge_size=512, scale=30
+        ... )
+        >>> print(rt.width, rt.height)
+        512 512
+        Rectangular raster:
+        >>> rt = cubexpress.lonlat2rt(
+        ...     lon=-76.0, lat=40.0,
+        ...     edge_size=(1024, 512), scale=30
+        ... )
+        >>> print(rt.width, rt.height)
+        1024 512
+    """
+    try:
+        x, y, crs = geo2utm(lon, lat)
+    except Exception:
+        x, y, crs = lonlat2rt_utm_or_ups(lon, lat)
+    # Parse edge_size
+    width, height = parse_edge_size(edge_size)
+    half_width = (width * scale) / 2
+    half_height = (height * scale) / 2
+    geotransform = dict(
+        scaleX=scale,
+        shearX=0,
+        translateX=x - half_width,
+        scaleY=-scale,
+        shearY=0,
+        translateY=y + half_height,
+    )
+    return RasterTransform(
+        crs=crs, geotransform=geotransform, width=width, height=height
+    )

cubexpress 0.1.4__tar.gz → 0.1.18__tar.gz

cubexpress 0.1.4tar.gz → 0.1.18tar.gz