PyPI - cubexpress - Versions diffs - 0.1.10__tar.gz → 0.1.11__tar.gz - Mend

cubexpress 0.1.10tar.gz → 0.1.11tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cubexpress might be problematic. Click here for more details.

Files changed (14) hide show

{cubexpress-0.1.10 → cubexpress-0.1.11}/PKG-INFO RENAMED Viewed

@@ -1,9 +1,8 @@
 Metadata-Version: 2.1
 Name: cubexpress
-Version: 0.1.10
+Version: 0.1.11
 Summary: Efficient processing of cubic Earth-observation (EO) data.
 Home-page: https://github.com/andesdatacube/cubexpress
-License: MIT
 Keywords: earth-engine,sentinel-2,geospatial,eo,cube
 Author: Julio Contreras
 Author-email: contrerasnetk@gmail.com

{cubexpress-0.1.10 → cubexpress-0.1.11}/cubexpress/__init__.py RENAMED Viewed

@@ -1,5 +1,5 @@
 from cubexpress.conversion import lonlat2rt, geo2utm
-from cubexpress.geotyping import RasterTransform, Request, RequestSet
+from cubexpress.geotyping import RasterTransform, Request, RequestSet, GeotransformDict
 from cubexpress.cloud_utils import s2_cloud_table
 from cubexpress.cube import get_cube
 from cubexpress.request import table_to_requestset
@@ -11,6 +11,7 @@ from cubexpress.request import table_to_requestset
 __all__ = [
     "lonlat2rt",
     "RasterTransform",
+    "GeotransformDict",
     "Request",
     "RequestSet",
     "geo2utm",
@@ -19,7 +20,7 @@ __all__ = [
     "table_to_requestset"
 ]
-# Dynamic version import
-import importlib.metadata
+# # Dynamic version import
+# import importlib.metadata
-__version__ = importlib.metadata.version("cubexpress")
+# __version__ = importlib.metadata.version("cubexpress")

{cubexpress-0.1.10 → cubexpress-0.1.11}/cubexpress/cloud_utils.py RENAMED Viewed

@@ -101,8 +101,11 @@ def _cloud_table_single_range(
     )
     # fill missing scores with daily mean
+    df["lon"] = lon
+    df["lat"] = lat
     df["cs_cdf"] = df["cs_cdf"].fillna(df.groupby("date")["cs_cdf"].transform("mean"))
     return df

{cubexpress-0.1.10 → cubexpress-0.1.11}/cubexpress/cube.py RENAMED Viewed

@@ -23,6 +23,7 @@ from cubexpress.downloader import download_manifest, download_manifests
 from cubexpress.geospatial import quadsplit_manifest, calculate_cell_size
 from cubexpress.request import table_to_requestset
 import pandas as pd
+from cubexpress.geotyping import RequestSet
 def get_geotiff(
@@ -51,14 +52,20 @@ def get_geotiff(
         size = manifest["grid"]["dimensions"]["width"]  # square images assumed
         cell_w, cell_h, power = calculate_cell_size(str(err), size)
         tiled = quadsplit_manifest(manifest, cell_w, cell_h, power)
-        download_manifests(tiled, full_outname, join, nworks)
+        download_manifests(
+            manifests = tiled,
+            full_outname = full_outname,
+            join = join,
+            max_workers = nworks
+        )
     if verbose:
         print(f"Downloaded {full_outname}")
 def get_cube(
-    table: pd.DataFrame,
+    # table: pd.DataFrame,
+    requests: pd.DataFrame | RequestSet,
     outfolder: pathlib.Path | str,
     mosaic: bool = True,
     join: bool = True,
@@ -81,10 +88,10 @@ def get_cube(
         Pool size for concurrent downloads; default **4**.
     """
-    requests = table_to_requestset(
-        table=table,
-        mosaic=mosaic
-    )
+    # requests = table_to_requestset(
+    #     table=table,
+    #     mosaic=mosaic
+    # )
     outfolder = pathlib.Path(outfolder).expanduser().resolve()
@@ -98,11 +105,11 @@ def get_cube(
             futures.append(
                 pool.submit(
                     get_geotiff,
-                    row.manifest,
-                    outname,
-                    join,
-                    nworks,
-                    verbose
+                    row.manifest, # manifest = row.manifest
+                    outname, # full_outname = outname
+                    join, # join = join
+                    nworks, # nworks = nworks
+                    verbose # verbose = verbose
                 )
             )
@@ -112,8 +119,14 @@ def get_cube(
             except Exception as exc:  # noqa: BLE001 – log and keep going
                 print(f"Download error: {exc}")
-    download_df = requests._dataframe[["outname", "cs_cdf", "date"]].copy()
-    download_df["outname"] = outfolder / requests._dataframe["outname"]
-    download_df.rename(columns={"outname": "full_outname"}, inplace=True)
+    # download_df = requests._dataframe[["outname", "cs_cdf", "date"]].copy()
+    # download_df["outname"] = outfolder / requests._dataframe["outname"]
+    # download_df.rename(columns={"outname": "full_outname"}, inplace=True)
-    return download_df
+    return
+# manifest = row.manifest
+# full_outname = outname
+# join: bool = True,
+# nworks: int = 4,
+# verbose: bool = True,

{cubexpress-0.1.10 → cubexpress-0.1.11}/cubexpress/downloader.py RENAMED Viewed

@@ -26,6 +26,7 @@ from rasterio.enums import Resampling
 import os
 import shutil
 import tempfile
+from cubexpress.geospatial import merge_tifs
 os.environ['CPL_LOG_ERRORS'] = 'OFF'
 logging.getLogger('rasterio._env').setLevel(logging.ERROR)
@@ -54,15 +55,15 @@ def download_manifest(ulist: Dict[str, Any], full_outname: pathlib.Path) -> None
                 driver="GTiff",
                 tiled=True,
                 interleave="band",
-                blockxsize=256, # TODO: Creo que es 128 (por de la superresolucion)
+                blockxsize=256,
                 blockysize=256,
                 compress="ZSTD",
-                # zstd_level=13,
+                zstd_level=13,
                 predictor=2,
                 num_threads=20,
                 nodata=65535,
                 dtype="uint16",
-                count=13,
+                count=12,
                 photometric="MINISBLACK"
             )
@@ -82,19 +83,25 @@ def download_manifests(
     ``000001.tif`` … according to the list order.
     """
     # full_outname = pathlib.Path("/home/contreras/Documents/GitHub/cubexpress/cubexpress_test/2017-08-19_6mfrw_18LVN.tif")
-    original_dir = full_outname.parent
     if join:
         tmp_dir = pathlib.Path(tempfile.mkdtemp(prefix="s2tmp_"))
-        full_outname = tmp_dir / full_outname.name
+        full_outname_temp = tmp_dir / full_outname.name
     with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
         futures = []
         for index, umanifest in enumerate(manifests):
-            folder = full_outname.parent / full_outname.stem
+            folder = full_outname_temp.parent / full_outname_temp.stem
             folder.mkdir(parents=True, exist_ok=True)
             outname = folder / f"{index:06d}.tif"
-            futures.append(executor.submit(download_manifest, umanifest, outname))
+            futures.append(
+                executor.submit(
+                    download_manifest,
+                    umanifest, # ulist = umanifest
+                    outname # full_outname = outname
+                )
+         )
         for fut in concurrent.futures.as_completed(futures):
             try:
@@ -102,34 +109,11 @@ def download_manifests(
             except Exception as exc:  # noqa: BLE001
                 print(f"Error en una de las descargas: {exc}")  # noqa: T201
-    dir_path = full_outname.parent / full_outname.stem
-    input_files = sorted(dir_path.glob("*.tif"))
-    if dir_path.exists() and len(input_files) > 1:
-        with rio.Env(GDAL_NUM_THREADS="8", NUM_THREADS="8"):
-            srcs = [rio.open(fp) for fp in input_files]
-            mosaic, out_transform = merge(
-                srcs,
-                nodata=65535,
-                resampling=Resampling.nearest
-            )
-            meta = srcs[0].profile.copy()
-            meta["transform"] = out_transform
-            meta.update(
-                height=mosaic.shape[1],
-                width=mosaic.shape[2]
-            )
-            outname = original_dir / full_outname.name
-            outname.parent.mkdir(parents=True, exist_ok=True)
-            with rio.open(outname, "w", **meta) as dst:
-                dst.write(mosaic)
-            for src in srcs:
-                src.close()
-        # Delete a folder with pathlib
+    dir_path = full_outname_temp.parent / full_outname_temp.stem
+    if dir_path.exists():
+        input_files = sorted(dir_path.glob("*.tif"))
+        merge_tifs(input_files, full_outname)
         shutil.rmtree(dir_path)
     else:
-        return outname
+        raise ValueError(f"Error in {full_outname}")

cubexpress-0.1.11/cubexpress/geospatial.py ADDED Viewed

@@ -0,0 +1,121 @@
+import ee
+import re
+from copy import deepcopy
+from typing import Dict
+import pathlib
+import rasterio as rio
+from rasterio.merge import merge
+from rasterio.enums import Resampling
+def quadsplit_manifest(manifest: Dict, cell_width: int, cell_height: int, power: int) -> list[Dict]:
+    manifest_copy = deepcopy(manifest)
+    manifest_copy["grid"]["dimensions"]["width"] = cell_width
+    manifest_copy["grid"]["dimensions"]["height"] = cell_height
+    x = manifest_copy["grid"]["affineTransform"]["translateX"]
+    y = manifest_copy["grid"]["affineTransform"]["translateY"]
+    scale_x = manifest_copy["grid"]["affineTransform"]["scaleX"]
+    scale_y = manifest_copy["grid"]["affineTransform"]["scaleY"]
+    manifests = []
+    for columny in range(2**power):
+        for rowx in range(2**power):
+            new_x = x + (rowx * cell_width) * scale_x
+            new_y = y + (columny * cell_height) * scale_y
+            new_manifest = deepcopy(manifest_copy)
+            new_manifest["grid"]["affineTransform"]["translateX"] = new_x
+            new_manifest["grid"]["affineTransform"]["translateY"] = new_y
+            manifests.append(new_manifest)
+    return manifests
+def calculate_cell_size(ee_error_message: str, size: int) -> tuple[int, int]:
+    match = re.findall(r'\d+', ee_error_message)
+    image_pixel = int(match[0])
+    max_pixel = int(match[1])
+    images = image_pixel / max_pixel
+    power = 0
+    while images > 1:
+        power += 1
+        images = image_pixel / (max_pixel * 4 ** power)
+    cell_width = size // 2 ** power
+    cell_height = size // 2 ** power
+    return cell_width, cell_height, power
+def _square_roi(lon: float, lat: float, edge_size: int, scale: int) -> ee.Geometry:
+    """Return a square `ee.Geometry` centred on (*lon*, *lat*)."""
+    half = edge_size * scale / 2
+    point = ee.Geometry.Point([lon, lat])
+    return point.buffer(half).bounds()
+def merge_tifs(
+    input_files: list[pathlib.Path],
+    output_path: pathlib.Path,
+    *,
+    nodata: int = 65535,
+    gdal_threads: int = 8
+) -> None:
+    """
+    Merge a list of GeoTIFF files into a single mosaic and write it out.
+    Parameters
+    ----------
+    input_files : list[Path]
+        Paths to the GeoTIFF tiles to be merged.
+    output_path : Path
+        Destination path for the merged GeoTIFF.
+    nodata : int, optional
+        NoData value to assign in the mosaic (default: 65535).
+    gdal_threads : int, optional
+        Number of GDAL threads to use for reading/writing (default: 8).
+    Raises
+    ------
+    ValueError
+        If `input_files` is empty.
+    """
+    if not input_files:
+        raise ValueError("The input_files list is empty")
+    # Ensure output path is a Path object
+    output_path = pathlib.Path(output_path).expanduser().resolve()
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    # Set GDAL threading environment
+    with rio.Env(GDAL_NUM_THREADS=str(gdal_threads), NUM_THREADS=str(gdal_threads)):
+        # Open all source datasets
+        srcs = [rio.open(fp) for fp in input_files]
+        try:
+            # Merge sources into one mosaic
+            mosaic, out_transform = merge(
+                srcs,
+                nodata=nodata,
+                resampling=Resampling.nearest
+            )
+            # Copy metadata from the first source and update it
+            meta = srcs[0].profile.copy()
+            meta.update({
+                "transform": out_transform,
+                "height": mosaic.shape[1],
+                "width": mosaic.shape[2]
+            })
+            # Write the merged mosaic to disk
+            with rio.open(output_path, "w", **meta) as dst:
+                dst.write(mosaic)
+        finally:
+            # Always close all open datasets
+            for src in srcs:
+                src.close()

{cubexpress-0.1.10 → cubexpress-0.1.11}/cubexpress/geotyping.py RENAMED Viewed

@@ -306,8 +306,8 @@ class RequestSet(BaseModel):
                             "crsCode": meta.raster_transform.crs,
                         },
                     },
-                    "cs_cdf": int(meta.id.split("_")[-1]) / 100,
-                    "date": meta.id.split("_")[0],
+                    # "cs_cdf": int(meta.id.split("_")[-1]) / 100,
+                    # "date": meta.id.split("_")[0],
                     "outname": f"{meta.id}.tif",
                 }

{cubexpress-0.1.10 → cubexpress-0.1.11}/cubexpress/request.py RENAMED Viewed

@@ -31,7 +31,6 @@ def table_to_requestset(
         If *df* is empty after filtering.
     """
     df = table.copy()
@@ -47,19 +46,28 @@ def table_to_requestset(
     centre_hash = pgh.encode(df.attrs["lat"], df.attrs["lon"], precision=5)
     reqs: list[Request] = []
     if mosaic:
         grouped = (
-        df.groupby('date')
+            df.groupby('date')
             .agg(
-                id_list      = ('id', list),
-                cs_cdf_mean  = ('cs_cdf', lambda x: int(round(x.mean(), 2) * 100))
+                id_list     = ('id', list),
+                tiles       = (
+                    'id',
+                    lambda ids: ','.join(
+                        sorted({i.split('_')[-1][1:] for i in ids})
+                    )
+                ),
+                cs_cdf_mean = (
+                    'cs_cdf',
+                    lambda x: int(round(x.mean(), 2) * 100)
+                )
             )
         )
         for day, row in grouped.iterrows():
             img_ids   = row["id_list"]
             cdf  = row["cs_cdf_mean"]
@@ -79,10 +87,11 @@ def table_to_requestset(
                 )
             else:
                 for img_id in img_ids:
-                    tile = img_id.split("_")[-1][1:]
+                    # tile = img_id.split("_")[-1][1:]
                     reqs.append(
                         Request(
-                            id=f"{day}_{centre_hash}_{tile}_{cdf}",
+                            # id=f"{day}_{centre_hash}_{tile}_{cdf}",
+                            id=f"{day}_{centre_hash}_{cdf}",
                             raster_transform=rt,
                             image=f"{df.attrs['collection']}/{img_id}",
                             bands=df.attrs["bands"],
@@ -91,13 +100,12 @@ def table_to_requestset(
     else:
         for _, row in df.iterrows():
             img_id = row["id"]
-            tile = img_id.split("_")[-1][1:]
+            # tile = img_id.split("_")[-1][1:]
             day = row["date"]
             cdf = int(round(row["cs_cdf"], 2) * 100)
             reqs.append(
                 Request(
-                    id=f"{day}_{centre_hash}_{tile}_{cdf}",
+                    id=f"{day}_{centre_hash}_{cdf}",
                     raster_transform=rt,
                     image=f"{df.attrs['collection']}/{img_id}",
                     bands=df.attrs["bands"],

{cubexpress-0.1.10 → cubexpress-0.1.11}/pyproject.toml RENAMED Viewed

@@ -1,12 +1,11 @@
 [tool.poetry]
 name        = "cubexpress"
-version     = "0.1.10"
+version     = "0.1.11"
 description = "Efficient processing of cubic Earth-observation (EO) data."
 authors     = [
   "Julio Contreras <contrerasnetk@gmail.com>",
-  "Cesar Aybar     <csaybar@gmail.com>",
 ]
-license      = "MIT"
 repository   = "https://github.com/andesdatacube/cubexpress"
 documentation = "https://andesdatacube.github.io/cubexpress"
 readme       = "README.md"

cubexpress-0.1.10/cubexpress/geospatial.py DELETED Viewed

@@ -1,55 +0,0 @@
-import ee
-import re
-from copy import deepcopy
-from typing import Dict
-def quadsplit_manifest(manifest: Dict, cell_width: int, cell_height: int, power: int) -> list[Dict]:
-    manifest_copy = deepcopy(manifest)
-    manifest_copy["grid"]["dimensions"]["width"] = cell_width
-    manifest_copy["grid"]["dimensions"]["height"] = cell_height
-    x = manifest_copy["grid"]["affineTransform"]["translateX"]
-    y = manifest_copy["grid"]["affineTransform"]["translateY"]
-    scale_x = manifest_copy["grid"]["affineTransform"]["scaleX"]
-    scale_y = manifest_copy["grid"]["affineTransform"]["scaleY"]
-    manifests = []
-    for columny in range(2**power):
-        for rowx in range(2**power):
-            new_x = x + (rowx * cell_width) * scale_x
-            new_y = y + (columny * cell_height) * scale_y
-            new_manifest = deepcopy(manifest_copy)
-            new_manifest["grid"]["affineTransform"]["translateX"] = new_x
-            new_manifest["grid"]["affineTransform"]["translateY"] = new_y
-            manifests.append(new_manifest)
-    return manifests
-def calculate_cell_size(ee_error_message: str, size: int) -> tuple[int, int]:
-    match = re.findall(r'\d+', ee_error_message)
-    image_pixel = int(match[0])
-    max_pixel = int(match[1])
-    images = image_pixel / max_pixel
-    power = 0
-    while images > 1:
-        power += 1
-        images = image_pixel / (max_pixel * 4 ** power)
-    cell_width = size // 2 ** power
-    cell_height = size // 2 ** power
-    return cell_width, cell_height, power
-def _square_roi(lon: float, lat: float, edge_size: int, scale: int) -> ee.Geometry:
-    """Return a square `ee.Geometry` centred on (*lon*, *lat*)."""
-    half = edge_size * scale / 2
-    point = ee.Geometry.Point([lon, lat])
-    return point.buffer(half).bounds()

{cubexpress-0.1.10 → cubexpress-0.1.11}/LICENSE RENAMED Viewed

File without changes

{cubexpress-0.1.10 → cubexpress-0.1.11}/README.md RENAMED Viewed

File without changes

{cubexpress-0.1.10 → cubexpress-0.1.11}/cubexpress/cache.py RENAMED Viewed

File without changes

{cubexpress-0.1.10 → cubexpress-0.1.11}/cubexpress/conversion.py RENAMED Viewed

File without changes

cubexpress 0.1.10__tar.gz → 0.1.11__tar.gz

Potentially problematic release.

cubexpress 0.1.10tar.gz → 0.1.11tar.gz