PyPI - cubexpress - Versions diffs - 0.1.10__tar.gz → 0.1.12__tar.gz - Mend

cubexpress 0.1.10tar.gz → 0.1.12tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cubexpress might be problematic. Click here for more details.

Files changed (15) hide show

{cubexpress-0.1.10 → cubexpress-0.1.12}/PKG-INFO RENAMED Viewed

@@ -1,9 +1,8 @@
 Metadata-Version: 2.1
 Name: cubexpress
-Version: 0.1.10
+Version: 0.1.12
 Summary: Efficient processing of cubic Earth-observation (EO) data.
 Home-page: https://github.com/andesdatacube/cubexpress
-License: MIT
 Keywords: earth-engine,sentinel-2,geospatial,eo,cube
 Author: Julio Contreras
 Author-email: contrerasnetk@gmail.com
@@ -32,7 +31,7 @@ Description-Content-Type: text/markdown
 <h1></h1>
 <p align="center">
-  <img src="./docs/logo_cubexpress.png" width="39%">
+  <img src="https://raw.githubusercontent.com/andesdatacube/cubexpress/refs/heads/main/docs/logo_cubexpress.png" width="39%">
 </p>
 <p align="center">

{cubexpress-0.1.10 → cubexpress-0.1.12}/README.md RENAMED Viewed

@@ -1,7 +1,7 @@
 <h1></h1>
 <p align="center">
-  <img src="./docs/logo_cubexpress.png" width="39%">
+  <img src="https://raw.githubusercontent.com/andesdatacube/cubexpress/refs/heads/main/docs/logo_cubexpress.png" width="39%">
 </p>
 <p align="center">

{cubexpress-0.1.10 → cubexpress-0.1.12}/cubexpress/__init__.py RENAMED Viewed

@@ -1,6 +1,6 @@
 from cubexpress.conversion import lonlat2rt, geo2utm
-from cubexpress.geotyping import RasterTransform, Request, RequestSet
-from cubexpress.cloud_utils import s2_cloud_table
+from cubexpress.geotyping import RasterTransform, Request, RequestSet, GeotransformDict
+from cubexpress.cloud_utils import s2_table
 from cubexpress.cube import get_cube
 from cubexpress.request import table_to_requestset
@@ -11,15 +11,16 @@ from cubexpress.request import table_to_requestset
 __all__ = [
     "lonlat2rt",
     "RasterTransform",
+    "GeotransformDict",
     "Request",
     "RequestSet",
     "geo2utm",
     "get_cube",
-    "s2_cloud_table",
+    "s2_table",
     "table_to_requestset"
 ]
-# Dynamic version import
-import importlib.metadata
+# # Dynamic version import
+# import importlib.metadata
-__version__ = importlib.metadata.version("cubexpress")
+# __version__ = importlib.metadata.version("cubexpress")

{cubexpress-0.1.10 → cubexpress-0.1.12}/cubexpress/cloud_utils.py RENAMED Viewed

@@ -15,9 +15,11 @@ from __future__ import annotations
 import datetime as dt
 import ee
 import pandas as pd
 from cubexpress.cache import _cache_key
+import datetime as dt
 from cubexpress.geospatial import _square_roi
+import warnings
+warnings.filterwarnings('ignore', category=DeprecationWarning)
 def _cloud_table_single_range(
@@ -55,58 +57,64 @@ def _cloud_table_single_range(
     center = ee.Geometry.Point([lon, lat])
     roi = _square_roi(lon, lat, edge_size, 10)
     s2 = (
-        ee.ImageCollection("COPERNICUS/S2_HARMONIZED")
+        ee.ImageCollection("COPERNICUS/S2_SR_HARMONIZED")
         .filterBounds(roi)
         .filterDate(start, end)
     )
-    csp = ee.ImageCollection("GOOGLE/CLOUD_SCORE_PLUS/V1/S2_HARMONIZED")
     ic = (
         s2
-        .linkCollection(csp, ["cs_cdf"])
+        .linkCollection(
+            ee.ImageCollection("GOOGLE/CLOUD_SCORE_PLUS/V1/S2_HARMONIZED"),
+            ["cs_cdf"]
+        )
         .select(["cs_cdf"])
     )
-    # image IDs for every expected date
-    ids = ic.aggregate_array("system:index").getInfo()
-    df_ids = pd.DataFrame({"id": ids})
-    region_scale = edge_size * 10 / 2
+    ids_inside = (
+        ic
+        .map(
+            lambda img: img.set(
+                'roi_inside_scene',
+                img.geometry().contains(roi, maxError=10)
+            )
+        )
+        .filter(ee.Filter.eq('roi_inside_scene', True))
+        .aggregate_array('system:index')
+        .getInfo()
+    )
     try:
-        raw = ic.getRegion(geometry=center, scale=region_scale).getInfo()
+        raw = ic.getRegion(
+            geometry=center,
+            scale=(edge_size) * 11
+        ).getInfo()
     except ee.ee_exception.EEException as e:
         if "No bands in collection" in str(e):
             return pd.DataFrame(
-                columns=["id", "cs_cdf", "date", "null_flag"]
+                columns=["id", "longitude", "latitude", "time", "cs_cdf", "inside"]
             )
-        raise
-    df_raw = pd.DataFrame(raw[1:], columns=raw[0])
-    df = (
-        df_ids
-        .merge(df_raw, on="id", how="left")
+        raise e
+    df_raw = (
+        pd.DataFrame(raw[1:], columns=raw[0])
+        .drop(columns=["longitude", "latitude"])
         .assign(
-            date=lambda d: pd.to_datetime(d["id"].str[:8], format="%Y%m%d").dt.strftime("%Y-%m-%d"),
-            null_flag=lambda d: d["cs_cdf"].isna().astype(int),
+            date=lambda d: pd.to_datetime(d["id"].str[:8], format="%Y%m%d").dt.strftime("%Y-%m-%d")
         )
-        .drop(columns=["longitude", "latitude", "time"])
     )
-    # fill missing scores with daily mean
-    df["cs_cdf"] = df["cs_cdf"].fillna(df.groupby("date")["cs_cdf"].transform("mean"))
-    return df
-def s2_cloud_table(
+    df_raw["inside"] = df_raw["id"].isin(set(ids_inside)).astype(int)
+    df_raw['cs_cdf'] = df_raw.groupby('date').apply(
+        lambda group: group['cs_cdf'].transform(
+            lambda _: group[group['inside'] == 1]['cs_cdf'].iloc[0]
+            if (group['inside'] == 1).any()
+            else group['cs_cdf'].mean()
+        )
+    ).reset_index(drop=True)
+    return df_raw
+def s2_table(
     lon: float,
     lat: float,
     edge_size: int,
@@ -114,8 +122,7 @@ def s2_cloud_table(
     end: str,
     max_cscore: float = 1.0,
     min_cscore: float = 0.0,
-    cache: bool = False,
-    verbose: bool = True,
+    cache: bool = False
 ) -> pd.DataFrame:
     """Build (and cache) a per-day cloud-table for the requested ROI.
@@ -144,9 +151,7 @@ def s2_cloud_table(
         Downstream path hint stored in ``result.attrs``; not used internally.
     cache
         Toggle parquet caching.
-    verbose
-        If *True* prints cache info/progress.
     Returns
     -------
     pandas.DataFrame
@@ -158,10 +163,9 @@ def s2_cloud_table(
     scale = 10
     cache_file = _cache_key(lon, lat, edge_size, scale, collection)
-    # ─── 1. Load cached data if present ────────────────────────────────────
+    # Load cached data if present
     if cache and cache_file.exists():
-        if verbose:
-            print("📂  Loading cached metadata …")
+        print("📂  Loading cached metadata …")
         df_cached = pd.read_parquet(cache_file)
         have_idx = pd.to_datetime(df_cached["date"], errors="coerce").dropna()
@@ -172,8 +176,7 @@ def s2_cloud_table(
             dt.date.fromisoformat(start) >= cached_start
             and dt.date.fromisoformat(end) <= cached_end
         ):
-            if verbose:
-                print("✅  Served entirely from metadata.")
+            print("✅  Served entirely from metadata.")
             df_full = df_cached
         else:
             # Identify missing segments and fetch only those.
@@ -182,14 +185,22 @@ def s2_cloud_table(
                 a1, b1 = start, cached_start.isoformat()
                 df_new_parts.append(
                     _cloud_table_single_range(
-                        lon, lat, edge_size, a1, b1
+                        lon=lon,
+                        lat=lat,
+                        edge_size=edge_size,
+                        start=a1,
+                        end=b1
                     )
                 )
             if dt.date.fromisoformat(end) > cached_end:
                 a2, b2 = cached_end.isoformat(), end
                 df_new_parts.append(
                     _cloud_table_single_range(
-                        lon, lat, edge_size, a2, b2
+                        lon=lon,
+                        lat=lat,
+                        edge_size=edge_size,
+                        start=a2,
+                        end=b2
                     )
                 )
             df_new_parts = [df for df in df_new_parts if not df.empty]
@@ -204,21 +215,20 @@ def s2_cloud_table(
             else:
                 df_full = df_cached
     else:
-        if verbose:
-            msg = "Generating metadata (no cache found)…" if cache else "Generating metadata…"
-            print("⏳", msg)
+        print("⏳ Generating metadata…")
         df_full = _cloud_table_single_range(
-            lon, lat, edge_size, start, end
+            lon=lon,
+            lat=lat,
+            edge_size=edge_size,
+            start=start,
+            end=end
         )
-    # ─── 2. Save cache ─────────────────────────────────────────────────────
+    # Save cache
     if cache:
         df_full.to_parquet(cache_file, compression="zstd")
-    # ─── 3. Filter by cloud cover and requested date window ────────────────
+    # Filter by cloud cover and requested date window
     result = (
         df_full.query("@start <= date <= @end")
         .query("@min_cscore <= cs_cdf <= @max_cscore")

{cubexpress-0.1.10 → cubexpress-0.1.12}/cubexpress/cube.py RENAMED Viewed

@@ -14,23 +14,23 @@ The core download/split logic lives in *cubexpress.downloader* and
 from __future__ import annotations
 import pathlib
-import concurrent.futures
+from concurrent.futures import ThreadPoolExecutor, as_completed
 from typing import Dict, Any
 import ee
+from tqdm import tqdm
 from cubexpress.downloader import download_manifest, download_manifests
 from cubexpress.geospatial import quadsplit_manifest, calculate_cell_size
 from cubexpress.request import table_to_requestset
 import pandas as pd
+from cubexpress.geotyping import RequestSet
 def get_geotiff(
     manifest: Dict[str, Any],
     full_outname: pathlib.Path | str,
-    join: bool = True,
-    nworks: int = 4,
-    verbose: bool = True,
+    nworks: int = 4
 ) -> None:
     """Download *manifest* to *full_outname*, retrying with tiled requests.
@@ -43,28 +43,27 @@ def get_geotiff(
     nworks
         Maximum worker threads when the image must be split; default **4**.
     """
-    full_outname = pathlib.Path(full_outname)
     try:
-        download_manifest(manifest, full_outname)
+        download_manifest(
+            ulist=manifest,
+            full_outname=full_outname
+        )
     except ee.ee_exception.EEException as err:
-        size = manifest["grid"]["dimensions"]["width"]  # square images assumed
+        size = manifest["grid"]["dimensions"]["width"]
         cell_w, cell_h, power = calculate_cell_size(str(err), size)
         tiled = quadsplit_manifest(manifest, cell_w, cell_h, power)
-        download_manifests(tiled, full_outname, join, nworks)
-    if verbose:
-        print(f"Downloaded {full_outname}")
+        download_manifests(
+            manifests=tiled,
+            full_outname=full_outname,
+            max_workers=nworks
+        )
 def get_cube(
-    table: pd.DataFrame,
+    requests: pd.DataFrame | RequestSet,
     outfolder: pathlib.Path | str,
-    mosaic: bool = True,
-    join: bool = True,
-    nworks: int = 4,
-    verbose: bool = True,
-    cache: bool = True
+    nworks: int = 4
 ) -> None:
     """Download every request in *requests* to *outfolder* using a thread pool.
@@ -80,40 +79,22 @@ def get_cube(
     nworks
         Pool size for concurrent downloads; default **4**.
     """
-    requests = table_to_requestset(
-        table=table,
-        mosaic=mosaic
-    )
     outfolder = pathlib.Path(outfolder).expanduser().resolve()
-    with concurrent.futures.ThreadPoolExecutor(max_workers=nworks) as pool:
-        futures = []
-        for _, row in requests._dataframe.iterrows():
-            outname = pathlib.Path(outfolder) / f"{row.id}.tif"
-            if outname.exists() and cache:
-                continue
-            outname.parent.mkdir(parents=True, exist_ok=True)
-            futures.append(
-                pool.submit(
-                    get_geotiff,
-                    row.manifest,
-                    outname,
-                    join,
-                    nworks,
-                    verbose
-                )
-            )
-        for fut in concurrent.futures.as_completed(futures):
+    outfolder.mkdir(parents=True, exist_ok=True)
+    dataframe = requests._dataframe if isinstance(requests, RequestSet) else requests
+    with ThreadPoolExecutor(max_workers=nworks) as executor:
+        futures = {
+            executor.submit(
+                get_geotiff,
+                manifest=row.manifest,
+                full_outname=pathlib.Path(outfolder) / f"{row.id}.tif",
+                nworks=nworks
+            ): row.id for _, row in dataframe.iterrows()
+        }
+        for future in tqdm(as_completed(futures), total=len(futures)):
             try:
-                fut.result()
-            except Exception as exc:  # noqa: BLE001 – log and keep going
-                print(f"Download error: {exc}")
-    download_df = requests._dataframe[["outname", "cs_cdf", "date"]].copy()
-    download_df["outname"] = outfolder / requests._dataframe["outname"]
-    download_df.rename(columns={"outname": "full_outname"}, inplace=True)
-    return download_df
+                future.result()
+            except Exception as exc:
+                print(f"Download error for {futures[future]}: {exc}")

cubexpress-0.1.12/cubexpress/downloader.py ADDED Viewed

@@ -0,0 +1,111 @@
+"""Low-level download helpers for Earth Engine manifests.
+Only two public callables are exposed:
+* :func:`download_manifest` – fetch a single manifest and write one GeoTIFF.
+* :func:`download_manifests` – convenience wrapper to parallel-download a list
+  of manifests with a thread pool.
+Both functions are fully I/O bound; no return value is expected.
+"""
+from __future__ import annotations
+import json
+import pathlib
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from copy import deepcopy
+from typing import Any, Dict
+import ee
+import rasterio as rio
+from rasterio.io import MemoryFile
+import logging
+import os
+import shutil
+import tempfile
+from cubexpress.geospatial import merge_tifs
+os.environ['CPL_LOG_ERRORS'] = 'OFF'
+logging.getLogger('rasterio._env').setLevel(logging.ERROR)
+def download_manifest(
+    ulist: Dict[str, Any],
+    full_outname: pathlib.Path
+) -> None:
+    """Download *ulist* and save it as *full_outname*.
+    The manifest must include either an ``assetId`` or an ``expression``
+    (serialized EE image). RasterIO is used to write a tiled, compressed
+    GeoTIFF; the function is silent apart from the final ``print``.
+    """
+    if "assetId" in ulist:
+        images_bytes = ee.data.getPixels(ulist)
+    elif "expression" in ulist:
+        ee_image = ee.deserializer.decode(json.loads(ulist["expression"]))
+        ulist_deep = deepcopy(ulist)
+        ulist_deep["expression"] = ee_image
+        images_bytes = ee.data.computePixels(ulist_deep)
+    else:
+        raise ValueError("Manifest does not contain 'assetId' or 'expression'")
+    with open(full_outname, "wb") as src:
+        src.write(images_bytes)
+    # with MemoryFile(images_bytes) as memfile:
+    #     with memfile.open() as src:
+    #         profile = src.profile
+    #         profile.update(
+    #             driver="GTiff",
+    #             tiled=True,
+    #             interleave="band",
+    #             blockxsize=256,
+    #             blockysize=256,
+    #             compress="ZSTD",
+    #             zstd_level=13,
+    #             predictor=2,
+    #             num_threads=20,
+    #             nodata=65535,
+    #             dtype="uint16",
+    #             count=12,
+    #             photometric="MINISBLACK"
+    #         )
+    #         with rio.open(full_outname, "w", **profile) as dst:
+    #             dst.write(src.read())
+def download_manifests(
+    manifests: list[Dict[str, Any]],
+    full_outname: pathlib.Path,
+    max_workers: int,
+) -> None:
+    """Download every manifest in *manifests* concurrently.
+    Each output file is saved in the folder
+    ``full_outname.parent/full_outname.stem`` with names ``000000.tif``,
+    ``000001.tif`` … according to the list order.
+    """
+    tmp_dir = pathlib.Path(tempfile.mkdtemp(prefix="cubexpress_"))
+    full_outname_temp = tmp_dir / full_outname.stem
+    full_outname_temp.mkdir(parents=True, exist_ok=True)
+    with ThreadPoolExecutor(max_workers=max_workers) as exe: # -
+        futures = {
+            exe.submit(
+                download_manifest,
+                ulist=umanifest,
+                full_outname=full_outname_temp / f"{index:06d}.tif"
+            ): umanifest for index, umanifest in enumerate(manifests)
+        }
+        for future in as_completed(futures):
+            try:
+                future.result()
+            except Exception as exc:
+                print(f"Error in one of the downloads: {exc}")
+    if full_outname_temp.exists():
+        input_files = sorted(full_outname_temp.glob("*.tif"))
+        merge_tifs(input_files, full_outname)
+        shutil.rmtree(full_outname_temp)
+    else:
+        raise ValueError(f"Error in {full_outname}")

cubexpress-0.1.12/cubexpress/geospatial.py ADDED Viewed

@@ -0,0 +1,119 @@
+import ee
+import re
+from copy import deepcopy
+from typing import Dict
+import pathlib
+import rasterio as rio
+from rasterio.merge import merge
+from rasterio.enums import Resampling
+def quadsplit_manifest(manifest: Dict, cell_width: int, cell_height: int, power: int) -> list[Dict]:
+    manifest_copy = deepcopy(manifest)
+    manifest_copy["grid"]["dimensions"]["width"] = cell_width
+    manifest_copy["grid"]["dimensions"]["height"] = cell_height
+    x = manifest_copy["grid"]["affineTransform"]["translateX"]
+    y = manifest_copy["grid"]["affineTransform"]["translateY"]
+    scale_x = manifest_copy["grid"]["affineTransform"]["scaleX"]
+    scale_y = manifest_copy["grid"]["affineTransform"]["scaleY"]
+    manifests = []
+    for columny in range(2**power):
+        for rowx in range(2**power):
+            new_x = x + (rowx * cell_width) * scale_x
+            new_y = y + (columny * cell_height) * scale_y
+            new_manifest = deepcopy(manifest_copy)
+            new_manifest["grid"]["affineTransform"]["translateX"] = new_x
+            new_manifest["grid"]["affineTransform"]["translateY"] = new_y
+            manifests.append(new_manifest)
+    return manifests
+def calculate_cell_size(ee_error_message: str, size: int) -> tuple[int, int]:
+    match = re.findall(r'\d+', ee_error_message)
+    image_pixel = int(match[0])
+    max_pixel = int(match[1])
+    images = image_pixel / max_pixel
+    power = 0
+    while images > 1:
+        power += 1
+        images = image_pixel / (max_pixel * 4 ** power)
+    cell_width = size // 2 ** power
+    cell_height = size // 2 ** power
+    return cell_width, cell_height, power
+def _square_roi(lon: float, lat: float, edge_size: int, scale: int) -> ee.Geometry:
+    """Return a square `ee.Geometry` centred on (*lon*, *lat*)."""
+    half = edge_size * scale / 2
+    point = ee.Geometry.Point([lon, lat])
+    return point.buffer(half).bounds()
+def merge_tifs(
+    input_files: list[pathlib.Path],
+    output_path: pathlib.Path,
+    *,
+    nodata: int = 65535,
+    gdal_threads: int = 8
+) -> None:
+    """
+    Merge a list of GeoTIFF files into a single mosaic and write it out.
+    Parameters
+    ----------
+    input_files : list[Path]
+        Paths to the GeoTIFF tiles to be merged.
+    output_path : Path
+        Destination path for the merged GeoTIFF.
+    nodata : int, optional
+        NoData value to assign in the mosaic (default: 65535).
+    gdal_threads : int, optional
+        Number of GDAL threads to use for reading/writing (default: 8).
+    Raises
+    ------
+    ValueError
+        If `input_files` is empty.
+    """
+    if not input_files:
+        raise ValueError("The input_files list is empty")
+    # Ensure output path is a Path object
+    output_path = pathlib.Path(output_path).expanduser().resolve()
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    # Set GDAL threading environment
+    with rio.Env(GDAL_NUM_THREADS=str(gdal_threads), NUM_THREADS=str(gdal_threads)):
+        # Open all source datasets
+        srcs = [rio.open(fp) for fp in input_files]
+        try:
+            # Merge sources into one mosaic
+            mosaic, out_transform = merge(
+                srcs,
+                nodata=nodata,
+                resampling=Resampling.nearest
+            )
+            # Copy metadata from the first source and update it
+            meta = srcs[0].profile.copy()
+            meta.update({
+                "transform": out_transform,
+                "height": mosaic.shape[1],
+                "width": mosaic.shape[2]
+            })
+            # Write the merged mosaic to disk
+            with rio.open(output_path, "w", **meta) as dst:
+                dst.write(mosaic)
+        finally:
+            # Always close all open datasets
+            for src in srcs:
+                src.close()

{cubexpress-0.1.10 → cubexpress-0.1.12}/cubexpress/geotyping.py RENAMED Viewed

@@ -259,13 +259,8 @@ class RequestSet(BaseModel):
     def create_manifests(self) -> pd.DataFrame:
         """
         Exports the raster metadata to a pandas DataFrame.
         Returns:
             pd.DataFrame: A DataFrame containing the metadata for all entries.
-        Example:
-            >>> df = raster_transform_set.export_df()
-            >>> print(df)
         """
         # Use ProcessPoolExecutor for CPU-bound tasks to convert raster transforms to lon/lat
         with ProcessPoolExecutor(max_workers=None) as executor:
@@ -306,8 +301,8 @@ class RequestSet(BaseModel):
                             "crsCode": meta.raster_transform.crs,
                         },
                     },
-                    "cs_cdf": int(meta.id.split("_")[-1]) / 100,
-                    "date": meta.id.split("_")[0],
+                    # "cs_cdf": int(meta.id.split("_")[-1]) / 100,
+                    # "date": meta.id.split("_")[0],
                     "outname": f"{meta.id}.tif",
                 }

{cubexpress-0.1.10 → cubexpress-0.1.12}/cubexpress/request.py RENAMED Viewed

@@ -11,9 +11,9 @@ from cubexpress.conversion import lonlat2rt
 def table_to_requestset(
-        table: pd.DataFrame,
-        mosaic: bool = True
-    ) -> RequestSet:
+    table: pd.DataFrame,
+    mosaic: bool = True
+) -> RequestSet:
     """Return a :class:`RequestSet` built from *df* (cloud_table result).
     Parameters
@@ -31,12 +31,11 @@ def table_to_requestset(
         If *df* is empty after filtering.
     """
     df = table.copy()
     if df.empty:
-        raise ValueError("cloud_table returned no rows; nothing to request.")
+        raise ValueError("There are no images in the requested period. Please check your dates or your ubication.")
     rt = lonlat2rt(
         lon=df.attrs["lon"],
@@ -44,22 +43,30 @@ def table_to_requestset(
         edge_size=df.attrs["edge_size"],
         scale=df.attrs["scale"],
     )
     centre_hash = pgh.encode(df.attrs["lat"], df.attrs["lon"], precision=5)
-    reqs: list[Request] = []
+    reqs = []
     if mosaic:
         grouped = (
-        df.groupby('date')
+            df.groupby('date')
             .agg(
-                id_list      = ('id', list),
-                cs_cdf_mean  = ('cs_cdf', lambda x: int(round(x.mean(), 2) * 100))
+                id_list     = ('id', list),
+                tiles       = (
+                    'id',
+                    lambda ids: ','.join(
+                        sorted({i.split('_')[-1][1:] for i in ids})
+                    )
+                ),
+                cs_cdf_mean = (
+                    'cs_cdf',
+                    lambda x: int(round(x.mean(), 2) * 100)
+                )
             )
         )
         for day, row in grouped.iterrows():
             img_ids   = row["id_list"]
             cdf  = row["cs_cdf_mean"]
@@ -79,10 +86,11 @@ def table_to_requestset(
                 )
             else:
                 for img_id in img_ids:
-                    tile = img_id.split("_")[-1][1:]
+                    # tile = img_id.split("_")[-1][1:]
                     reqs.append(
                         Request(
-                            id=f"{day}_{centre_hash}_{tile}_{cdf}",
+                            # id=f"{day}_{centre_hash}_{tile}_{cdf}",
+                            id=f"{day}_{centre_hash}_{cdf}",
                             raster_transform=rt,
                             image=f"{df.attrs['collection']}/{img_id}",
                             bands=df.attrs["bands"],
@@ -94,14 +102,13 @@ def table_to_requestset(
             tile = img_id.split("_")[-1][1:]
             day = row["date"]
             cdf = int(round(row["cs_cdf"], 2) * 100)
             reqs.append(
                 Request(
-                    id=f"{day}_{centre_hash}_{tile}_{cdf}",
+                    id=f"{day}_{tile}_{cdf}",
                     raster_transform=rt,
                     image=f"{df.attrs['collection']}/{img_id}",
                     bands=df.attrs["bands"],
                 )
             )
-    return RequestSet(requestset=reqs)
+    return RequestSet(requestset=reqs)

{cubexpress-0.1.10 → cubexpress-0.1.12}/pyproject.toml RENAMED Viewed

@@ -1,12 +1,11 @@
 [tool.poetry]
 name        = "cubexpress"
-version     = "0.1.10"
+version     = "0.1.12"
 description = "Efficient processing of cubic Earth-observation (EO) data."
 authors     = [
   "Julio Contreras <contrerasnetk@gmail.com>",
-  "Cesar Aybar     <csaybar@gmail.com>",
 ]
-license      = "MIT"
 repository   = "https://github.com/andesdatacube/cubexpress"
 documentation = "https://andesdatacube.github.io/cubexpress"
 readme       = "README.md"

cubexpress-0.1.10/cubexpress/downloader.py DELETED Viewed

@@ -1,135 +0,0 @@
-"""Low-level download helpers for Earth Engine manifests.
-Only two public callables are exposed:
-* :func:`download_manifest` – fetch a single manifest and write one GeoTIFF.
-* :func:`download_manifests` – convenience wrapper to parallel-download a list
-  of manifests with a thread pool.
-Both functions are fully I/O bound; no return value is expected.
-"""
-from __future__ import annotations
-import json
-import pathlib
-import concurrent.futures
-from copy import deepcopy
-from typing import Any, Dict, List
-import ee
-import rasterio as rio
-from rasterio.io import MemoryFile
-import logging
-from rasterio.merge import merge
-from rasterio.enums import Resampling
-import os
-import shutil
-import tempfile
-os.environ['CPL_LOG_ERRORS'] = 'OFF'
-logging.getLogger('rasterio._env').setLevel(logging.ERROR)
-def download_manifest(ulist: Dict[str, Any], full_outname: pathlib.Path) -> None:
-    """Download *ulist* and save it as *full_outname*.
-    The manifest must include either an ``assetId`` or an ``expression``
-    (serialized EE image). RasterIO is used to write a tiled, compressed
-    GeoTIFF; the function is silent apart from the final ``print``.
-    """
-    if "assetId" in ulist:
-        images_bytes = ee.data.getPixels(ulist)
-    elif "expression" in ulist:
-        ee_image = ee.deserializer.decode(json.loads(ulist["expression"]))
-        ulist_deep = deepcopy(ulist)
-        ulist_deep["expression"] = ee_image
-        images_bytes = ee.data.computePixels(ulist_deep)
-    else:  # pragma: no cover
-        raise ValueError("Manifest does not contain 'assetId' or 'expression'")
-    with MemoryFile(images_bytes) as memfile:
-        with memfile.open() as src:
-            profile = src.profile
-            profile.update(
-                driver="GTiff",
-                tiled=True,
-                interleave="band",
-                blockxsize=256, # TODO: Creo que es 128 (por de la superresolucion)
-                blockysize=256,
-                compress="ZSTD",
-                # zstd_level=13,
-                predictor=2,
-                num_threads=20,
-                nodata=65535,
-                dtype="uint16",
-                count=13,
-                photometric="MINISBLACK"
-            )
-            with rio.open(full_outname, "w", **profile) as dst:
-                dst.write(src.read())
-def download_manifests(
-    manifests: list[Dict[str, Any]],
-    full_outname: pathlib.Path,
-    join: bool = True,
-    max_workers: int = 4,
-) -> None:
-    """Download every manifest in *manifests* concurrently.
-    Each output file is saved in the folder
-    ``full_outname.parent/full_outname.stem`` with names ``000000.tif``,
-    ``000001.tif`` … according to the list order.
-    """
-    # full_outname = pathlib.Path("/home/contreras/Documents/GitHub/cubexpress/cubexpress_test/2017-08-19_6mfrw_18LVN.tif")
-    original_dir = full_outname.parent
-    if join:
-        tmp_dir = pathlib.Path(tempfile.mkdtemp(prefix="s2tmp_"))
-        full_outname = tmp_dir / full_outname.name
-    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
-        futures = []
-        for index, umanifest in enumerate(manifests):
-            folder = full_outname.parent / full_outname.stem
-            folder.mkdir(parents=True, exist_ok=True)
-            outname = folder / f"{index:06d}.tif"
-            futures.append(executor.submit(download_manifest, umanifest, outname))
-        for fut in concurrent.futures.as_completed(futures):
-            try:
-                fut.result()
-            except Exception as exc:  # noqa: BLE001
-                print(f"Error en una de las descargas: {exc}")  # noqa: T201
-    dir_path = full_outname.parent / full_outname.stem
-    input_files = sorted(dir_path.glob("*.tif"))
-    if dir_path.exists() and len(input_files) > 1:
-        with rio.Env(GDAL_NUM_THREADS="8", NUM_THREADS="8"):
-            srcs = [rio.open(fp) for fp in input_files]
-            mosaic, out_transform = merge(
-                srcs,
-                nodata=65535,
-                resampling=Resampling.nearest
-            )
-            meta = srcs[0].profile.copy()
-            meta["transform"] = out_transform
-            meta.update(
-                height=mosaic.shape[1],
-                width=mosaic.shape[2]
-            )
-            outname = original_dir / full_outname.name
-            outname.parent.mkdir(parents=True, exist_ok=True)
-            with rio.open(outname, "w", **meta) as dst:
-                dst.write(mosaic)
-            for src in srcs:
-                src.close()
-        # Delete a folder with pathlib
-        shutil.rmtree(dir_path)
-    else:
-        return outname

cubexpress-0.1.10/cubexpress/geospatial.py DELETED Viewed

@@ -1,55 +0,0 @@
-import ee
-import re
-from copy import deepcopy
-from typing import Dict
-def quadsplit_manifest(manifest: Dict, cell_width: int, cell_height: int, power: int) -> list[Dict]:
-    manifest_copy = deepcopy(manifest)
-    manifest_copy["grid"]["dimensions"]["width"] = cell_width
-    manifest_copy["grid"]["dimensions"]["height"] = cell_height
-    x = manifest_copy["grid"]["affineTransform"]["translateX"]
-    y = manifest_copy["grid"]["affineTransform"]["translateY"]
-    scale_x = manifest_copy["grid"]["affineTransform"]["scaleX"]
-    scale_y = manifest_copy["grid"]["affineTransform"]["scaleY"]
-    manifests = []
-    for columny in range(2**power):
-        for rowx in range(2**power):
-            new_x = x + (rowx * cell_width) * scale_x
-            new_y = y + (columny * cell_height) * scale_y
-            new_manifest = deepcopy(manifest_copy)
-            new_manifest["grid"]["affineTransform"]["translateX"] = new_x
-            new_manifest["grid"]["affineTransform"]["translateY"] = new_y
-            manifests.append(new_manifest)
-    return manifests
-def calculate_cell_size(ee_error_message: str, size: int) -> tuple[int, int]:
-    match = re.findall(r'\d+', ee_error_message)
-    image_pixel = int(match[0])
-    max_pixel = int(match[1])
-    images = image_pixel / max_pixel
-    power = 0
-    while images > 1:
-        power += 1
-        images = image_pixel / (max_pixel * 4 ** power)
-    cell_width = size // 2 ** power
-    cell_height = size // 2 ** power
-    return cell_width, cell_height, power
-def _square_roi(lon: float, lat: float, edge_size: int, scale: int) -> ee.Geometry:
-    """Return a square `ee.Geometry` centred on (*lon*, *lat*)."""
-    half = edge_size * scale / 2
-    point = ee.Geometry.Point([lon, lat])
-    return point.buffer(half).bounds()

{cubexpress-0.1.10 → cubexpress-0.1.12}/LICENSE RENAMED Viewed

File without changes

{cubexpress-0.1.10 → cubexpress-0.1.12}/cubexpress/cache.py RENAMED Viewed

File without changes

{cubexpress-0.1.10 → cubexpress-0.1.12}/cubexpress/conversion.py RENAMED Viewed

File without changes

cubexpress 0.1.10__tar.gz → 0.1.12__tar.gz

Potentially problematic release.

cubexpress 0.1.10tar.gz → 0.1.12tar.gz