cubexpress 0.1.10__tar.gz → 0.1.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cubexpress might be problematic. Click here for more details.

@@ -1,9 +1,8 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cubexpress
3
- Version: 0.1.10
3
+ Version: 0.1.11
4
4
  Summary: Efficient processing of cubic Earth-observation (EO) data.
5
5
  Home-page: https://github.com/andesdatacube/cubexpress
6
- License: MIT
7
6
  Keywords: earth-engine,sentinel-2,geospatial,eo,cube
8
7
  Author: Julio Contreras
9
8
  Author-email: contrerasnetk@gmail.com
@@ -1,5 +1,5 @@
1
1
  from cubexpress.conversion import lonlat2rt, geo2utm
2
- from cubexpress.geotyping import RasterTransform, Request, RequestSet
2
+ from cubexpress.geotyping import RasterTransform, Request, RequestSet, GeotransformDict
3
3
  from cubexpress.cloud_utils import s2_cloud_table
4
4
  from cubexpress.cube import get_cube
5
5
  from cubexpress.request import table_to_requestset
@@ -11,6 +11,7 @@ from cubexpress.request import table_to_requestset
11
11
  __all__ = [
12
12
  "lonlat2rt",
13
13
  "RasterTransform",
14
+ "GeotransformDict",
14
15
  "Request",
15
16
  "RequestSet",
16
17
  "geo2utm",
@@ -19,7 +20,7 @@ __all__ = [
19
20
  "table_to_requestset"
20
21
  ]
21
22
 
22
- # Dynamic version import
23
- import importlib.metadata
23
+ # # Dynamic version import
24
+ # import importlib.metadata
24
25
 
25
- __version__ = importlib.metadata.version("cubexpress")
26
+ # __version__ = importlib.metadata.version("cubexpress")
@@ -101,8 +101,11 @@ def _cloud_table_single_range(
101
101
  )
102
102
 
103
103
  # fill missing scores with daily mean
104
+ df["lon"] = lon
105
+ df["lat"] = lat
104
106
  df["cs_cdf"] = df["cs_cdf"].fillna(df.groupby("date")["cs_cdf"].transform("mean"))
105
107
 
108
+
106
109
  return df
107
110
 
108
111
 
@@ -23,6 +23,7 @@ from cubexpress.downloader import download_manifest, download_manifests
23
23
  from cubexpress.geospatial import quadsplit_manifest, calculate_cell_size
24
24
  from cubexpress.request import table_to_requestset
25
25
  import pandas as pd
26
+ from cubexpress.geotyping import RequestSet
26
27
 
27
28
 
28
29
  def get_geotiff(
@@ -51,14 +52,20 @@ def get_geotiff(
51
52
  size = manifest["grid"]["dimensions"]["width"] # square images assumed
52
53
  cell_w, cell_h, power = calculate_cell_size(str(err), size)
53
54
  tiled = quadsplit_manifest(manifest, cell_w, cell_h, power)
54
- download_manifests(tiled, full_outname, join, nworks)
55
+ download_manifests(
56
+ manifests = tiled,
57
+ full_outname = full_outname,
58
+ join = join,
59
+ max_workers = nworks
60
+ )
55
61
 
56
62
  if verbose:
57
63
  print(f"Downloaded {full_outname}")
58
64
 
59
65
 
60
66
  def get_cube(
61
- table: pd.DataFrame,
67
+ # table: pd.DataFrame,
68
+ requests: pd.DataFrame | RequestSet,
62
69
  outfolder: pathlib.Path | str,
63
70
  mosaic: bool = True,
64
71
  join: bool = True,
@@ -81,10 +88,10 @@ def get_cube(
81
88
  Pool size for concurrent downloads; default **4**.
82
89
  """
83
90
 
84
- requests = table_to_requestset(
85
- table=table,
86
- mosaic=mosaic
87
- )
91
+ # requests = table_to_requestset(
92
+ # table=table,
93
+ # mosaic=mosaic
94
+ # )
88
95
 
89
96
  outfolder = pathlib.Path(outfolder).expanduser().resolve()
90
97
 
@@ -98,11 +105,11 @@ def get_cube(
98
105
  futures.append(
99
106
  pool.submit(
100
107
  get_geotiff,
101
- row.manifest,
102
- outname,
103
- join,
104
- nworks,
105
- verbose
108
+ row.manifest, # manifest = row.manifest
109
+ outname, # full_outname = outname
110
+ join, # join = join
111
+ nworks, # nworks = nworks
112
+ verbose # verbose = verbose
106
113
  )
107
114
  )
108
115
 
@@ -112,8 +119,14 @@ def get_cube(
112
119
  except Exception as exc: # noqa: BLE001 – log and keep going
113
120
  print(f"Download error: {exc}")
114
121
 
115
- download_df = requests._dataframe[["outname", "cs_cdf", "date"]].copy()
116
- download_df["outname"] = outfolder / requests._dataframe["outname"]
117
- download_df.rename(columns={"outname": "full_outname"}, inplace=True)
122
+ # download_df = requests._dataframe[["outname", "cs_cdf", "date"]].copy()
123
+ # download_df["outname"] = outfolder / requests._dataframe["outname"]
124
+ # download_df.rename(columns={"outname": "full_outname"}, inplace=True)
118
125
 
119
- return download_df
126
+ return
127
+
128
+ # manifest = row.manifest
129
+ # full_outname = outname
130
+ # join: bool = True,
131
+ # nworks: int = 4,
132
+ # verbose: bool = True,
@@ -26,6 +26,7 @@ from rasterio.enums import Resampling
26
26
  import os
27
27
  import shutil
28
28
  import tempfile
29
+ from cubexpress.geospatial import merge_tifs
29
30
 
30
31
  os.environ['CPL_LOG_ERRORS'] = 'OFF'
31
32
  logging.getLogger('rasterio._env').setLevel(logging.ERROR)
@@ -54,15 +55,15 @@ def download_manifest(ulist: Dict[str, Any], full_outname: pathlib.Path) -> None
54
55
  driver="GTiff",
55
56
  tiled=True,
56
57
  interleave="band",
57
- blockxsize=256, # TODO: Creo que es 128 (por de la superresolucion)
58
+ blockxsize=256,
58
59
  blockysize=256,
59
60
  compress="ZSTD",
60
- # zstd_level=13,
61
+ zstd_level=13,
61
62
  predictor=2,
62
63
  num_threads=20,
63
64
  nodata=65535,
64
65
  dtype="uint16",
65
- count=13,
66
+ count=12,
66
67
  photometric="MINISBLACK"
67
68
  )
68
69
 
@@ -82,19 +83,25 @@ def download_manifests(
82
83
  ``000001.tif`` … according to the list order.
83
84
  """
84
85
  # full_outname = pathlib.Path("/home/contreras/Documents/GitHub/cubexpress/cubexpress_test/2017-08-19_6mfrw_18LVN.tif")
85
- original_dir = full_outname.parent
86
+
86
87
  if join:
87
88
  tmp_dir = pathlib.Path(tempfile.mkdtemp(prefix="s2tmp_"))
88
- full_outname = tmp_dir / full_outname.name
89
+ full_outname_temp = tmp_dir / full_outname.name
89
90
 
90
91
  with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
91
92
  futures = []
92
93
 
93
94
  for index, umanifest in enumerate(manifests):
94
- folder = full_outname.parent / full_outname.stem
95
+ folder = full_outname_temp.parent / full_outname_temp.stem
95
96
  folder.mkdir(parents=True, exist_ok=True)
96
97
  outname = folder / f"{index:06d}.tif"
97
- futures.append(executor.submit(download_manifest, umanifest, outname))
98
+ futures.append(
99
+ executor.submit(
100
+ download_manifest,
101
+ umanifest, # ulist = umanifest
102
+ outname # full_outname = outname
103
+ )
104
+ )
98
105
 
99
106
  for fut in concurrent.futures.as_completed(futures):
100
107
  try:
@@ -102,34 +109,11 @@ def download_manifests(
102
109
  except Exception as exc: # noqa: BLE001
103
110
  print(f"Error en una de las descargas: {exc}") # noqa: T201
104
111
 
105
- dir_path = full_outname.parent / full_outname.stem
106
- input_files = sorted(dir_path.glob("*.tif"))
107
-
108
- if dir_path.exists() and len(input_files) > 1:
109
-
110
- with rio.Env(GDAL_NUM_THREADS="8", NUM_THREADS="8"):
111
- srcs = [rio.open(fp) for fp in input_files]
112
- mosaic, out_transform = merge(
113
- srcs,
114
- nodata=65535,
115
- resampling=Resampling.nearest
116
- )
117
-
118
- meta = srcs[0].profile.copy()
119
- meta["transform"] = out_transform
120
- meta.update(
121
- height=mosaic.shape[1],
122
- width=mosaic.shape[2]
123
- )
124
- outname = original_dir / full_outname.name
125
- outname.parent.mkdir(parents=True, exist_ok=True)
126
- with rio.open(outname, "w", **meta) as dst:
127
- dst.write(mosaic)
128
-
129
- for src in srcs:
130
- src.close()
131
112
 
132
- # Delete a folder with pathlib
113
+ dir_path = full_outname_temp.parent / full_outname_temp.stem
114
+ if dir_path.exists():
115
+ input_files = sorted(dir_path.glob("*.tif"))
116
+ merge_tifs(input_files, full_outname)
133
117
  shutil.rmtree(dir_path)
134
118
  else:
135
- return outname
119
+ raise ValueError(f"Error in {full_outname}")
@@ -0,0 +1,121 @@
1
+ import ee
2
+ import re
3
+ from copy import deepcopy
4
+ from typing import Dict
5
+ import pathlib
6
+ import rasterio as rio
7
+ from rasterio.merge import merge
8
+ from rasterio.enums import Resampling
9
+
10
+
11
+
12
+ def quadsplit_manifest(manifest: Dict, cell_width: int, cell_height: int, power: int) -> list[Dict]:
13
+ manifest_copy = deepcopy(manifest)
14
+
15
+ manifest_copy["grid"]["dimensions"]["width"] = cell_width
16
+ manifest_copy["grid"]["dimensions"]["height"] = cell_height
17
+ x = manifest_copy["grid"]["affineTransform"]["translateX"]
18
+ y = manifest_copy["grid"]["affineTransform"]["translateY"]
19
+ scale_x = manifest_copy["grid"]["affineTransform"]["scaleX"]
20
+ scale_y = manifest_copy["grid"]["affineTransform"]["scaleY"]
21
+
22
+ manifests = []
23
+
24
+ for columny in range(2**power):
25
+ for rowx in range(2**power):
26
+ new_x = x + (rowx * cell_width) * scale_x
27
+ new_y = y + (columny * cell_height) * scale_y
28
+ new_manifest = deepcopy(manifest_copy)
29
+ new_manifest["grid"]["affineTransform"]["translateX"] = new_x
30
+ new_manifest["grid"]["affineTransform"]["translateY"] = new_y
31
+ manifests.append(new_manifest)
32
+
33
+ return manifests
34
+
35
+ def calculate_cell_size(ee_error_message: str, size: int) -> tuple[int, int]:
36
+ match = re.findall(r'\d+', ee_error_message)
37
+ image_pixel = int(match[0])
38
+ max_pixel = int(match[1])
39
+
40
+ images = image_pixel / max_pixel
41
+ power = 0
42
+
43
+ while images > 1:
44
+ power += 1
45
+ images = image_pixel / (max_pixel * 4 ** power)
46
+
47
+ cell_width = size // 2 ** power
48
+ cell_height = size // 2 ** power
49
+
50
+ return cell_width, cell_height, power
51
+
52
+
53
+
54
+ def _square_roi(lon: float, lat: float, edge_size: int, scale: int) -> ee.Geometry:
55
+ """Return a square `ee.Geometry` centred on (*lon*, *lat*)."""
56
+ half = edge_size * scale / 2
57
+ point = ee.Geometry.Point([lon, lat])
58
+ return point.buffer(half).bounds()
59
+
60
+
61
+
62
+ def merge_tifs(
63
+ input_files: list[pathlib.Path],
64
+ output_path: pathlib.Path,
65
+ *,
66
+ nodata: int = 65535,
67
+ gdal_threads: int = 8
68
+ ) -> None:
69
+ """
70
+ Merge a list of GeoTIFF files into a single mosaic and write it out.
71
+
72
+ Parameters
73
+ ----------
74
+ input_files : list[Path]
75
+ Paths to the GeoTIFF tiles to be merged.
76
+ output_path : Path
77
+ Destination path for the merged GeoTIFF.
78
+ nodata : int, optional
79
+ NoData value to assign in the mosaic (default: 65535).
80
+ gdal_threads : int, optional
81
+ Number of GDAL threads to use for reading/writing (default: 8).
82
+
83
+ Raises
84
+ ------
85
+ ValueError
86
+ If `input_files` is empty.
87
+ """
88
+ if not input_files:
89
+ raise ValueError("The input_files list is empty")
90
+
91
+ # Ensure output path is a Path object
92
+ output_path = pathlib.Path(output_path).expanduser().resolve()
93
+ output_path.parent.mkdir(parents=True, exist_ok=True)
94
+
95
+ # Set GDAL threading environment
96
+ with rio.Env(GDAL_NUM_THREADS=str(gdal_threads), NUM_THREADS=str(gdal_threads)):
97
+ # Open all source datasets
98
+ srcs = [rio.open(fp) for fp in input_files]
99
+ try:
100
+ # Merge sources into one mosaic
101
+ mosaic, out_transform = merge(
102
+ srcs,
103
+ nodata=nodata,
104
+ resampling=Resampling.nearest
105
+ )
106
+
107
+ # Copy metadata from the first source and update it
108
+ meta = srcs[0].profile.copy()
109
+ meta.update({
110
+ "transform": out_transform,
111
+ "height": mosaic.shape[1],
112
+ "width": mosaic.shape[2]
113
+ })
114
+
115
+ # Write the merged mosaic to disk
116
+ with rio.open(output_path, "w", **meta) as dst:
117
+ dst.write(mosaic)
118
+ finally:
119
+ # Always close all open datasets
120
+ for src in srcs:
121
+ src.close()
@@ -306,8 +306,8 @@ class RequestSet(BaseModel):
306
306
  "crsCode": meta.raster_transform.crs,
307
307
  },
308
308
  },
309
- "cs_cdf": int(meta.id.split("_")[-1]) / 100,
310
- "date": meta.id.split("_")[0],
309
+ # "cs_cdf": int(meta.id.split("_")[-1]) / 100,
310
+ # "date": meta.id.split("_")[0],
311
311
  "outname": f"{meta.id}.tif",
312
312
  }
313
313
 
@@ -31,7 +31,6 @@ def table_to_requestset(
31
31
  If *df* is empty after filtering.
32
32
 
33
33
  """
34
-
35
34
 
36
35
  df = table.copy()
37
36
 
@@ -47,19 +46,28 @@ def table_to_requestset(
47
46
  centre_hash = pgh.encode(df.attrs["lat"], df.attrs["lon"], precision=5)
48
47
  reqs: list[Request] = []
49
48
 
50
-
51
-
52
49
  if mosaic:
50
+
53
51
  grouped = (
54
- df.groupby('date')
52
+ df.groupby('date')
55
53
  .agg(
56
- id_list = ('id', list),
57
- cs_cdf_mean = ('cs_cdf', lambda x: int(round(x.mean(), 2) * 100))
54
+ id_list = ('id', list),
55
+ tiles = (
56
+ 'id',
57
+ lambda ids: ','.join(
58
+ sorted({i.split('_')[-1][1:] for i in ids})
59
+ )
60
+ ),
61
+ cs_cdf_mean = (
62
+ 'cs_cdf',
63
+ lambda x: int(round(x.mean(), 2) * 100)
64
+ )
58
65
  )
59
66
  )
60
67
 
61
68
  for day, row in grouped.iterrows():
62
69
 
70
+
63
71
  img_ids = row["id_list"]
64
72
  cdf = row["cs_cdf_mean"]
65
73
 
@@ -79,10 +87,11 @@ def table_to_requestset(
79
87
  )
80
88
  else:
81
89
  for img_id in img_ids:
82
- tile = img_id.split("_")[-1][1:]
90
+ # tile = img_id.split("_")[-1][1:]
83
91
  reqs.append(
84
92
  Request(
85
- id=f"{day}_{centre_hash}_{tile}_{cdf}",
93
+ # id=f"{day}_{centre_hash}_{tile}_{cdf}",
94
+ id=f"{day}_{centre_hash}_{cdf}",
86
95
  raster_transform=rt,
87
96
  image=f"{df.attrs['collection']}/{img_id}",
88
97
  bands=df.attrs["bands"],
@@ -91,13 +100,12 @@ def table_to_requestset(
91
100
  else:
92
101
  for _, row in df.iterrows():
93
102
  img_id = row["id"]
94
- tile = img_id.split("_")[-1][1:]
103
+ # tile = img_id.split("_")[-1][1:]
95
104
  day = row["date"]
96
105
  cdf = int(round(row["cs_cdf"], 2) * 100)
97
-
98
106
  reqs.append(
99
107
  Request(
100
- id=f"{day}_{centre_hash}_{tile}_{cdf}",
108
+ id=f"{day}_{centre_hash}_{cdf}",
101
109
  raster_transform=rt,
102
110
  image=f"{df.attrs['collection']}/{img_id}",
103
111
  bands=df.attrs["bands"],
@@ -1,12 +1,11 @@
1
1
  [tool.poetry]
2
2
  name = "cubexpress"
3
- version = "0.1.10"
3
+ version = "0.1.11"
4
4
  description = "Efficient processing of cubic Earth-observation (EO) data."
5
5
  authors = [
6
6
  "Julio Contreras <contrerasnetk@gmail.com>",
7
- "Cesar Aybar <csaybar@gmail.com>",
8
7
  ]
9
- license = "MIT"
8
+
10
9
  repository = "https://github.com/andesdatacube/cubexpress"
11
10
  documentation = "https://andesdatacube.github.io/cubexpress"
12
11
  readme = "README.md"
@@ -1,55 +0,0 @@
1
- import ee
2
- import re
3
- from copy import deepcopy
4
- from typing import Dict
5
-
6
-
7
- def quadsplit_manifest(manifest: Dict, cell_width: int, cell_height: int, power: int) -> list[Dict]:
8
- manifest_copy = deepcopy(manifest)
9
-
10
- manifest_copy["grid"]["dimensions"]["width"] = cell_width
11
- manifest_copy["grid"]["dimensions"]["height"] = cell_height
12
- x = manifest_copy["grid"]["affineTransform"]["translateX"]
13
- y = manifest_copy["grid"]["affineTransform"]["translateY"]
14
- scale_x = manifest_copy["grid"]["affineTransform"]["scaleX"]
15
- scale_y = manifest_copy["grid"]["affineTransform"]["scaleY"]
16
-
17
- manifests = []
18
-
19
- for columny in range(2**power):
20
- for rowx in range(2**power):
21
- new_x = x + (rowx * cell_width) * scale_x
22
- new_y = y + (columny * cell_height) * scale_y
23
- new_manifest = deepcopy(manifest_copy)
24
- new_manifest["grid"]["affineTransform"]["translateX"] = new_x
25
- new_manifest["grid"]["affineTransform"]["translateY"] = new_y
26
- manifests.append(new_manifest)
27
-
28
- return manifests
29
-
30
-
31
-
32
- def calculate_cell_size(ee_error_message: str, size: int) -> tuple[int, int]:
33
- match = re.findall(r'\d+', ee_error_message)
34
- image_pixel = int(match[0])
35
- max_pixel = int(match[1])
36
-
37
- images = image_pixel / max_pixel
38
- power = 0
39
-
40
- while images > 1:
41
- power += 1
42
- images = image_pixel / (max_pixel * 4 ** power)
43
-
44
- cell_width = size // 2 ** power
45
- cell_height = size // 2 ** power
46
-
47
- return cell_width, cell_height, power
48
-
49
-
50
-
51
- def _square_roi(lon: float, lat: float, edge_size: int, scale: int) -> ee.Geometry:
52
- """Return a square `ee.Geometry` centred on (*lon*, *lat*)."""
53
- half = edge_size * scale / 2
54
- point = ee.Geometry.Point([lon, lat])
55
- return point.buffer(half).bounds()
File without changes
File without changes