PyPI - cubexpress - Versions diffs - 0.1.8__tar.gz → 0.1.9__tar.gz - Mend

cubexpress 0.1.8tar.gz → 0.1.9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cubexpress might be problematic. Click here for more details.

Files changed (13) hide show

{cubexpress-0.1.8 → cubexpress-0.1.9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: cubexpress
-Version: 0.1.8
+Version: 0.1.9
 Summary: Efficient processing of cubic Earth-observation (EO) data.
 Home-page: https://github.com/andesdatacube/cubexpress
 License: MIT

{cubexpress-0.1.8 → cubexpress-0.1.9}/cubexpress/__init__.py RENAMED Viewed

@@ -1,6 +1,6 @@
 from cubexpress.conversion import lonlat2rt, geo2utm
 from cubexpress.geotyping import RasterTransform, Request, RequestSet
-from cubexpress.cloud_utils import cloud_table
+from cubexpress.cloud_utils import s2_cloud_table
 from cubexpress.cube import get_cube
 from cubexpress.request import table_to_requestset
@@ -15,7 +15,7 @@ __all__ = [
     "RequestSet",
     "geo2utm",
     "get_cube",
-    "cloud_table",
+    "s2_cloud_table",
     "table_to_requestset"
 ]

{cubexpress-0.1.8 → cubexpress-0.1.9}/cubexpress/cloud_utils.py RENAMED Viewed

@@ -27,25 +27,30 @@ def _cloud_table_single_range(
     start: str,
     end: str
 ) -> pd.DataFrame:
-    """Return raw cloud-table rows for a single *start–end* interval.
+    """
+    Build a daily cloud-score table for a square Sentinel-2 footprint.
     Parameters
     ----------
-    lon, lat
-        Centre coordinates in decimal degrees.
-    edge_size, scale
-        ROI size in pixels (*edge_size*) and pixel resolution in metres
-        (*scale*), fed into :pyfunc:`cubexpress.geospatial._square_roi`.
-    start, end
-        ISO-dates (``YYYY-MM-DD``) delimiting the query.
-    collection
-        Sentinel-2 collection name to query.
+    lon, lat : float
+        Point at the centre of the requested region (°).
+    edge_size : int
+        Side length of the square region in Sentinel-2 pixels (10 m each).
+    start, end : str
+        ISO-8601 dates delimiting the period, e.g. ``"2024-06-01"``.
     Returns
     -------
     pandas.DataFrame
-        Columns: **day** (str), **cloudPct** (float), **images** (str
-        concatenation of asset IDs separated by ``-``). No filtering applied.
+        One row per image with columns:
+        * ``id`` – Sentinel-2 ID
+        * ``cs_cdf`` – Cloud Score Plus CDF (0–1)
+        * ``date`` – acquisition date (YYYY-MM-DD)
+        * ``high_null_flag`` – 1 if cloud score missing
+    Notes
+    -----
+    Missing ``cs_cdf`` values are filled with the mean of the same day.
     """
     center = ee.Geometry.Point([lon, lat])
@@ -64,6 +69,8 @@ def _cloud_table_single_range(
         .linkCollection(csp, ["cs_cdf"])
         .select(["cs_cdf"])
     )
+    # image IDs for every expected date
     ids = ic.aggregate_array("system:index").getInfo()
     df_ids = pd.DataFrame({"id": ids})
@@ -93,6 +100,7 @@ def _cloud_table_single_range(
         .drop(columns=["longitude", "latitude", "time"])
     )
+    # fill missing scores with daily mean
     df["cs_cdf"] = df["cs_cdf"].fillna(df.groupby("date")["cs_cdf"].transform("mean"))
     return df
@@ -101,11 +109,12 @@ def _cloud_table_single_range(
 def s2_cloud_table(
     lon: float,
     lat: float,
-    edge_size: int = 2048,
-    start: str = "2017-01-01",
-    end: str = "2024-12-31",
-    cscore: float = 0.5,
-    cache: bool = True,
+    edge_size: int,
+    start: str,
+    end: str,
+    max_cscore: float = 1.0,
+    min_cscore: float = 0.0,
+    cache: bool = False,
     verbose: bool = True,
 ) -> pd.DataFrame:
     """Build (and cache) a per-day cloud-table for the requested ROI.
@@ -206,7 +215,7 @@ def s2_cloud_table(
     result = (
         df_full.query("@start <= date <= @end")
-        .query("cs_cdf > @cscore")
+        .query("@min_cscore <= cs_cdf <= @max_cscore")
         .reset_index(drop=True)
     )
@@ -221,4 +230,4 @@ def s2_cloud_table(
             "collection": collection
         }
     )
-    return result
+    return result

{cubexpress-0.1.8 → cubexpress-0.1.9}/cubexpress/cube.py RENAMED Viewed

@@ -16,17 +16,22 @@ from __future__ import annotations
 import pathlib
 import concurrent.futures
 from typing import Dict, Any
 import ee
 from cubexpress.downloader import download_manifest, download_manifests
 from cubexpress.geospatial import quadsplit_manifest, calculate_cell_size
-from cubexpress.geotyping import RequestSet
+from cubexpress.request import table_to_requestset
+import pandas as pd
 def get_geotiff(
     manifest: Dict[str, Any],
     full_outname: pathlib.Path | str,
+    join: bool = True,
+    eraser: bool = True,
     nworks: int = 4,
+    verbose: bool = True,
 ) -> None:
     """Download *manifest* to *full_outname*, retrying with tiled requests.
@@ -39,19 +44,26 @@ def get_geotiff(
     nworks
         Maximum worker threads when the image must be split; default **4**.
     """
+    full_outname = pathlib.Path(full_outname)
     try:
-        download_manifest(manifest, pathlib.Path(full_outname))
+        download_manifest(manifest, full_outname)
     except ee.ee_exception.EEException as err:
-        # Handle EE “too many pixels” error by recursive tiling.
         size = manifest["grid"]["dimensions"]["width"]  # square images assumed
         cell_w, cell_h, power = calculate_cell_size(str(err), size)
         tiled = quadsplit_manifest(manifest, cell_w, cell_h, power)
-        download_manifests(tiled, max_workers=nworks, full_outname=pathlib.Path(full_outname))
+        download_manifests(tiled, full_outname, join, eraser, nworks)
+    if verbose:
+        print(f"Downloaded {full_outname}")
 def get_cube(
-    requests: RequestSet,
+    table: pd.DataFrame,
     outfolder: pathlib.Path | str,
+    join: bool = True,
+    eraser: bool = True,
+    mosaic: bool = True,
     nworks: int = 4,
 ) -> None:
     """Download every request in *requests* to *outfolder* using a thread pool.
@@ -68,14 +80,18 @@ def get_cube(
     nworks
         Pool size for concurrent downloads; default **4**.
     """
-    out = pathlib.Path(outfolder)
+    requests = table_to_requestset(
+        table=table,
+        mosaic=mosaic
+    )
     with concurrent.futures.ThreadPoolExecutor(max_workers=nworks) as pool:
         futures = []
         for _, row in requests._dataframe.iterrows():
-            outname = out / f"{row.id}.tif"
+            outname = pathlib.Path(outfolder) / f"{row.id}.tif"
             outname.parent.mkdir(parents=True, exist_ok=True)
-            futures.append(pool.submit(get_geotiff, row.manifest, outname, nworks))
+            futures.append(pool.submit(get_geotiff, row.manifest, outname, join, eraser, nworks))
         for fut in concurrent.futures.as_completed(futures):
             try:

{cubexpress-0.1.8 → cubexpress-0.1.9}/cubexpress/downloader.py RENAMED Viewed

@@ -21,7 +21,10 @@ import ee
 import rasterio as rio
 from rasterio.io import MemoryFile
 import logging
+from rasterio.merge import merge
+from rasterio.enums import Resampling
 import os
+import shutil
 os.environ['CPL_LOG_ERRORS'] = 'OFF'
 logging.getLogger('rasterio._env').setLevel(logging.ERROR)
@@ -53,7 +56,7 @@ def download_manifest(ulist: Dict[str, Any], full_outname: pathlib.Path) -> None
                 blockxsize=256,
                 blockysize=256,
                 compress="ZSTD",
-                zstd_level=13,
+                # zstd_level=13,
                 predictor=2,
                 num_threads=20,
                 nodata=65535,
@@ -65,13 +68,12 @@ def download_manifest(ulist: Dict[str, Any], full_outname: pathlib.Path) -> None
             with rio.open(full_outname, "w", **profile) as dst:
                 dst.write(src.read())
-    print(f"{full_outname} downloaded successfully.")  # noqa: T201
 def download_manifests(
     manifests: List[Dict[str, Any]],
-    max_workers: int,
     full_outname: pathlib.Path,
+    join: bool = True,
+    eraser: bool = True,
+    max_workers: int = 4,
 ) -> None:
     """Download every manifest in *manifests* concurrently.
@@ -93,3 +95,41 @@ def download_manifests(
                 fut.result()
             except Exception as exc:  # noqa: BLE001
                 print(f"Error en una de las descargas: {exc}")  # noqa: T201
+    if join:
+        dir_path = full_outname.parent / full_outname.stem
+        input_files = sorted(dir_path.glob("*.tif"))
+        if dir_path.exists() and len(input_files) > 1:
+            with rio.Env(GDAL_NUM_THREADS="8", NUM_THREADS="8"):
+                srcs = [rio.open(fp) for fp in input_files]
+                mosaic, out_transform = merge(
+                    srcs,
+                    nodata=65535,
+                    resampling=Resampling.nearest
+                )
+                meta = srcs[0].profile.copy()
+                meta["transform"] = out_transform
+                meta.update(
+                    height=mosaic.shape[1],
+                    width=mosaic.shape[2]
+                )
+                with rio.open(full_outname, "w", **meta) as dst:
+                    dst.write(mosaic)
+                for src in srcs:
+                    src.close()
+            if eraser:
+                # Delete a folder with pathlib
+                shutil.rmtree(dir_path)
+            print("✅ Mosaico generado:", full_outname)
+            return full_outname
+        else:
+            return full_outname

{cubexpress-0.1.8 → cubexpress-0.1.9}/cubexpress/geotyping.py RENAMED Viewed

@@ -482,7 +482,7 @@ class RequestSet(BaseModel):
             str: A string representation of the entire RasterTransformSet.
         """
         num_entries = len(self.requestset)
-        return f"RasterTransformSet({num_entries} entries)"
+        return f"RequestSet({num_entries} entries)"
     def __str__(self):
         return super().__repr__()

{cubexpress-0.1.8 → cubexpress-0.1.9}/cubexpress/request.py RENAMED Viewed

@@ -5,13 +5,15 @@ from __future__ import annotations
 import ee
 import pandas as pd
 import pygeohash as pgh
-from typing import List
 from cubexpress.geotyping import Request, RequestSet
 from cubexpress.conversion import lonlat2rt
-def table_to_requestset(df: pd.DataFrame, *, mosaic: bool = True) -> RequestSet:
+def table_to_requestset(
+        table: pd.DataFrame,
+        mosaic: bool = True
+    ) -> RequestSet:
     """Return a :class:`RequestSet` built from *df* (cloud_table result).
     Parameters
@@ -31,30 +33,30 @@ def table_to_requestset(df: pd.DataFrame, *, mosaic: bool = True) -> RequestSet:
     """
-    df_ = df.copy()
+    df = table.copy()
-    if df_.empty:
+    if df.empty:
         raise ValueError("cloud_table returned no rows; nothing to request.")
     rt = lonlat2rt(
-        lon=df_.attrs["lon"],
-        lat=df_.attrs["lat"],
-        edge_size=df_.attrs["edge_size"],
-        scale=df_.attrs["scale"],
+        lon=df.attrs["lon"],
+        lat=df.attrs["lat"],
+        edge_size=df.attrs["edge_size"],
+        scale=df.attrs["scale"],
     )
-    centre_hash = pgh.encode(df_.attrs["lat"], df_.attrs["lon"], precision=5)
+    centre_hash = pgh.encode(df.attrs["lat"], df.attrs["lon"], precision=5)
     reqs: list[Request] = []
     if mosaic:
         # group all asset IDs per day
         grouped = (
-            df_.groupby("date")["id"]   # Series con listas de ids por día
+            df.groupby("date")["id"]   # Series con listas de ids por día
             .apply(list)
         )
         for day, img_ids in grouped.items():
             ee_img = ee.ImageCollection(
-                [ee.Image(f"{df_.attrs['collection']}/{img}") for img in img_ids]
+                [ee.Image(f"{df.attrs['collection']}/{img}") for img in img_ids]
             ).mosaic()
             reqs.append(
@@ -62,11 +64,11 @@ def table_to_requestset(df: pd.DataFrame, *, mosaic: bool = True) -> RequestSet:
                     id=f"{day}_{centre_hash}",
                     raster_transform=rt,
                     image=ee_img,
-                    bands=df_.attrs["bands"],
+                    bands=df.attrs["bands"],
                 )
             )
     else:  # one request per asset
-        for _, row in df_.iterrows():
+        for _, row in df.iterrows():
             img_id = row["id"]
             day    = row["date"]
@@ -74,8 +76,8 @@ def table_to_requestset(df: pd.DataFrame, *, mosaic: bool = True) -> RequestSet:
                 Request(
                     id=f"{day}_{centre_hash}_{img_id}",
                     raster_transform=rt,
-                    image=f"{df_.attrs['collection']}/{img_id}",
-                    bands=df_.attrs["bands"],
+                    image=f"{df.attrs['collection']}/{img_id}",
+                    bands=df.attrs["bands"],
                 )
             )

{cubexpress-0.1.8 → cubexpress-0.1.9}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name        = "cubexpress"
-version     = "0.1.8"
+version     = "0.1.9"
 description = "Efficient processing of cubic Earth-observation (EO) data."
 authors     = [
   "Julio Contreras <contrerasnetk@gmail.com>",

{cubexpress-0.1.8 → cubexpress-0.1.9}/LICENSE RENAMED Viewed

File without changes

{cubexpress-0.1.8 → cubexpress-0.1.9}/README.md RENAMED Viewed

File without changes

{cubexpress-0.1.8 → cubexpress-0.1.9}/cubexpress/cache.py RENAMED Viewed

File without changes

{cubexpress-0.1.8 → cubexpress-0.1.9}/cubexpress/conversion.py RENAMED Viewed

File without changes

{cubexpress-0.1.8 → cubexpress-0.1.9}/cubexpress/geospatial.py RENAMED Viewed

File without changes

cubexpress 0.1.8__tar.gz → 0.1.9__tar.gz

Potentially problematic release.

cubexpress 0.1.8tar.gz → 0.1.9tar.gz