cubexpress 0.1.10__py3-none-any.whl → 0.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cubexpress might be problematic. Click here for more details.

cubexpress/__init__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  from cubexpress.conversion import lonlat2rt, geo2utm
2
- from cubexpress.geotyping import RasterTransform, Request, RequestSet
3
- from cubexpress.cloud_utils import s2_cloud_table
2
+ from cubexpress.geotyping import RasterTransform, Request, RequestSet, GeotransformDict
3
+ from cubexpress.cloud_utils import s2_table
4
4
  from cubexpress.cube import get_cube
5
5
  from cubexpress.request import table_to_requestset
6
6
 
@@ -11,15 +11,16 @@ from cubexpress.request import table_to_requestset
11
11
  __all__ = [
12
12
  "lonlat2rt",
13
13
  "RasterTransform",
14
+ "GeotransformDict",
14
15
  "Request",
15
16
  "RequestSet",
16
17
  "geo2utm",
17
18
  "get_cube",
18
- "s2_cloud_table",
19
+ "s2_table",
19
20
  "table_to_requestset"
20
21
  ]
21
22
 
22
- # Dynamic version import
23
- import importlib.metadata
23
+ # # Dynamic version import
24
+ # import importlib.metadata
24
25
 
25
- __version__ = importlib.metadata.version("cubexpress")
26
+ # __version__ = importlib.metadata.version("cubexpress")
cubexpress/cloud_utils.py CHANGED
@@ -15,9 +15,11 @@ from __future__ import annotations
15
15
  import datetime as dt
16
16
  import ee
17
17
  import pandas as pd
18
-
19
18
  from cubexpress.cache import _cache_key
19
+ import datetime as dt
20
20
  from cubexpress.geospatial import _square_roi
21
+ import warnings
22
+ warnings.filterwarnings('ignore', category=DeprecationWarning)
21
23
 
22
24
 
23
25
  def _cloud_table_single_range(
@@ -55,58 +57,64 @@ def _cloud_table_single_range(
55
57
 
56
58
  center = ee.Geometry.Point([lon, lat])
57
59
  roi = _square_roi(lon, lat, edge_size, 10)
58
-
60
+
59
61
  s2 = (
60
- ee.ImageCollection("COPERNICUS/S2_HARMONIZED")
62
+ ee.ImageCollection("COPERNICUS/S2_SR_HARMONIZED")
61
63
  .filterBounds(roi)
62
64
  .filterDate(start, end)
63
65
  )
64
-
65
- csp = ee.ImageCollection("GOOGLE/CLOUD_SCORE_PLUS/V1/S2_HARMONIZED")
66
-
67
66
  ic = (
68
67
  s2
69
- .linkCollection(csp, ["cs_cdf"])
68
+ .linkCollection(
69
+ ee.ImageCollection("GOOGLE/CLOUD_SCORE_PLUS/V1/S2_HARMONIZED"),
70
+ ["cs_cdf"]
71
+ )
70
72
  .select(["cs_cdf"])
71
73
  )
72
-
73
- # image IDs for every expected date
74
- ids = ic.aggregate_array("system:index").getInfo()
75
- df_ids = pd.DataFrame({"id": ids})
76
-
77
-
78
- region_scale = edge_size * 10 / 2
79
-
80
-
74
+ ids_inside = (
75
+ ic
76
+ .map(
77
+ lambda img: img.set(
78
+ 'roi_inside_scene',
79
+ img.geometry().contains(roi, maxError=10)
80
+ )
81
+ )
82
+ .filter(ee.Filter.eq('roi_inside_scene', True))
83
+ .aggregate_array('system:index')
84
+ .getInfo()
85
+ )
86
+
81
87
  try:
82
- raw = ic.getRegion(geometry=center, scale=region_scale).getInfo()
88
+ raw = ic.getRegion(
89
+ geometry=center,
90
+ scale=(edge_size) * 11
91
+ ).getInfo()
83
92
  except ee.ee_exception.EEException as e:
84
93
  if "No bands in collection" in str(e):
85
94
  return pd.DataFrame(
86
- columns=["id", "cs_cdf", "date", "null_flag"]
95
+ columns=["id", "longitude", "latitude", "time", "cs_cdf", "inside"]
87
96
  )
88
- raise
89
-
90
- df_raw = pd.DataFrame(raw[1:], columns=raw[0])
91
-
92
-
93
- df = (
94
- df_ids
95
- .merge(df_raw, on="id", how="left")
97
+ raise e
98
+
99
+ df_raw = (
100
+ pd.DataFrame(raw[1:], columns=raw[0])
101
+ .drop(columns=["longitude", "latitude"])
96
102
  .assign(
97
- date=lambda d: pd.to_datetime(d["id"].str[:8], format="%Y%m%d").dt.strftime("%Y-%m-%d"),
98
- null_flag=lambda d: d["cs_cdf"].isna().astype(int),
103
+ date=lambda d: pd.to_datetime(d["id"].str[:8], format="%Y%m%d").dt.strftime("%Y-%m-%d")
99
104
  )
100
- .drop(columns=["longitude", "latitude", "time"])
101
105
  )
102
-
103
- # fill missing scores with daily mean
104
- df["cs_cdf"] = df["cs_cdf"].fillna(df.groupby("date")["cs_cdf"].transform("mean"))
105
-
106
- return df
107
-
108
-
109
- def s2_cloud_table(
106
+ df_raw["inside"] = df_raw["id"].isin(set(ids_inside)).astype(int)
107
+ df_raw['cs_cdf'] = df_raw.groupby('date').apply(
108
+ lambda group: group['cs_cdf'].transform(
109
+ lambda _: group[group['inside'] == 1]['cs_cdf'].iloc[0]
110
+ if (group['inside'] == 1).any()
111
+ else group['cs_cdf'].mean()
112
+ )
113
+ ).reset_index(drop=True)
114
+
115
+ return df_raw
116
+
117
+ def s2_table(
110
118
  lon: float,
111
119
  lat: float,
112
120
  edge_size: int,
@@ -114,8 +122,7 @@ def s2_cloud_table(
114
122
  end: str,
115
123
  max_cscore: float = 1.0,
116
124
  min_cscore: float = 0.0,
117
- cache: bool = False,
118
- verbose: bool = True,
125
+ cache: bool = False
119
126
  ) -> pd.DataFrame:
120
127
  """Build (and cache) a per-day cloud-table for the requested ROI.
121
128
 
@@ -144,9 +151,7 @@ def s2_cloud_table(
144
151
  Downstream path hint stored in ``result.attrs``; not used internally.
145
152
  cache
146
153
  Toggle parquet caching.
147
- verbose
148
- If *True* prints cache info/progress.
149
-
154
+
150
155
  Returns
151
156
  -------
152
157
  pandas.DataFrame
@@ -158,10 +163,9 @@ def s2_cloud_table(
158
163
  scale = 10
159
164
  cache_file = _cache_key(lon, lat, edge_size, scale, collection)
160
165
 
161
- # ─── 1. Load cached data if present ────────────────────────────────────
166
+ # Load cached data if present
162
167
  if cache and cache_file.exists():
163
- if verbose:
164
- print("📂 Loading cached metadata …")
168
+ print("📂 Loading cached metadata …")
165
169
  df_cached = pd.read_parquet(cache_file)
166
170
  have_idx = pd.to_datetime(df_cached["date"], errors="coerce").dropna()
167
171
 
@@ -172,8 +176,7 @@ def s2_cloud_table(
172
176
  dt.date.fromisoformat(start) >= cached_start
173
177
  and dt.date.fromisoformat(end) <= cached_end
174
178
  ):
175
- if verbose:
176
- print("✅ Served entirely from metadata.")
179
+ print("✅ Served entirely from metadata.")
177
180
  df_full = df_cached
178
181
  else:
179
182
  # Identify missing segments and fetch only those.
@@ -182,14 +185,22 @@ def s2_cloud_table(
182
185
  a1, b1 = start, cached_start.isoformat()
183
186
  df_new_parts.append(
184
187
  _cloud_table_single_range(
185
- lon, lat, edge_size, a1, b1
188
+ lon=lon,
189
+ lat=lat,
190
+ edge_size=edge_size,
191
+ start=a1,
192
+ end=b1
186
193
  )
187
194
  )
188
195
  if dt.date.fromisoformat(end) > cached_end:
189
196
  a2, b2 = cached_end.isoformat(), end
190
197
  df_new_parts.append(
191
198
  _cloud_table_single_range(
192
- lon, lat, edge_size, a2, b2
199
+ lon=lon,
200
+ lat=lat,
201
+ edge_size=edge_size,
202
+ start=a2,
203
+ end=b2
193
204
  )
194
205
  )
195
206
  df_new_parts = [df for df in df_new_parts if not df.empty]
@@ -204,21 +215,20 @@ def s2_cloud_table(
204
215
  else:
205
216
  df_full = df_cached
206
217
  else:
207
-
208
- if verbose:
209
- msg = "Generating metadata (no cache found)…" if cache else "Generating metadata…"
210
- print("⏳", msg)
218
+ print("⏳ Generating metadata…")
211
219
  df_full = _cloud_table_single_range(
212
- lon, lat, edge_size, start, end
220
+ lon=lon,
221
+ lat=lat,
222
+ edge_size=edge_size,
223
+ start=start,
224
+ end=end
213
225
  )
214
-
215
226
 
216
- # ─── 2. Save cache ─────────────────────────────────────────────────────
227
+ # Save cache
217
228
  if cache:
218
229
  df_full.to_parquet(cache_file, compression="zstd")
219
230
 
220
- # ─── 3. Filter by cloud cover and requested date window ────────────────
221
-
231
+ # Filter by cloud cover and requested date window
222
232
  result = (
223
233
  df_full.query("@start <= date <= @end")
224
234
  .query("@min_cscore <= cs_cdf <= @max_cscore")
cubexpress/cube.py CHANGED
@@ -14,23 +14,23 @@ The core download/split logic lives in *cubexpress.downloader* and
14
14
  from __future__ import annotations
15
15
 
16
16
  import pathlib
17
- import concurrent.futures
17
+ from concurrent.futures import ThreadPoolExecutor, as_completed
18
18
  from typing import Dict, Any
19
19
  import ee
20
+ from tqdm import tqdm
20
21
 
21
22
 
22
23
  from cubexpress.downloader import download_manifest, download_manifests
23
24
  from cubexpress.geospatial import quadsplit_manifest, calculate_cell_size
24
25
  from cubexpress.request import table_to_requestset
25
26
  import pandas as pd
27
+ from cubexpress.geotyping import RequestSet
26
28
 
27
29
 
28
30
  def get_geotiff(
29
31
  manifest: Dict[str, Any],
30
32
  full_outname: pathlib.Path | str,
31
- join: bool = True,
32
- nworks: int = 4,
33
- verbose: bool = True,
33
+ nworks: int = 4
34
34
  ) -> None:
35
35
  """Download *manifest* to *full_outname*, retrying with tiled requests.
36
36
 
@@ -43,28 +43,27 @@ def get_geotiff(
43
43
  nworks
44
44
  Maximum worker threads when the image must be split; default **4**.
45
45
  """
46
- full_outname = pathlib.Path(full_outname)
46
+
47
47
  try:
48
- download_manifest(manifest, full_outname)
48
+ download_manifest(
49
+ ulist=manifest,
50
+ full_outname=full_outname
51
+ )
49
52
  except ee.ee_exception.EEException as err:
50
-
51
- size = manifest["grid"]["dimensions"]["width"] # square images assumed
53
+ size = manifest["grid"]["dimensions"]["width"]
52
54
  cell_w, cell_h, power = calculate_cell_size(str(err), size)
53
55
  tiled = quadsplit_manifest(manifest, cell_w, cell_h, power)
54
- download_manifests(tiled, full_outname, join, nworks)
55
-
56
- if verbose:
57
- print(f"Downloaded {full_outname}")
58
-
56
+
57
+ download_manifests(
58
+ manifests=tiled,
59
+ full_outname=full_outname,
60
+ max_workers=nworks
61
+ )
59
62
 
60
63
  def get_cube(
61
- table: pd.DataFrame,
64
+ requests: pd.DataFrame | RequestSet,
62
65
  outfolder: pathlib.Path | str,
63
- mosaic: bool = True,
64
- join: bool = True,
65
- nworks: int = 4,
66
- verbose: bool = True,
67
- cache: bool = True
66
+ nworks: int = 4
68
67
  ) -> None:
69
68
  """Download every request in *requests* to *outfolder* using a thread pool.
70
69
 
@@ -80,40 +79,22 @@ def get_cube(
80
79
  nworks
81
80
  Pool size for concurrent downloads; default **4**.
82
81
  """
83
-
84
- requests = table_to_requestset(
85
- table=table,
86
- mosaic=mosaic
87
- )
88
82
 
89
83
  outfolder = pathlib.Path(outfolder).expanduser().resolve()
90
-
91
- with concurrent.futures.ThreadPoolExecutor(max_workers=nworks) as pool:
92
- futures = []
93
- for _, row in requests._dataframe.iterrows():
94
- outname = pathlib.Path(outfolder) / f"{row.id}.tif"
95
- if outname.exists() and cache:
96
- continue
97
- outname.parent.mkdir(parents=True, exist_ok=True)
98
- futures.append(
99
- pool.submit(
100
- get_geotiff,
101
- row.manifest,
102
- outname,
103
- join,
104
- nworks,
105
- verbose
106
- )
107
- )
108
-
109
- for fut in concurrent.futures.as_completed(futures):
84
+ outfolder.mkdir(parents=True, exist_ok=True)
85
+ dataframe = requests._dataframe if isinstance(requests, RequestSet) else requests
86
+
87
+ with ThreadPoolExecutor(max_workers=nworks) as executor:
88
+ futures = {
89
+ executor.submit(
90
+ get_geotiff,
91
+ manifest=row.manifest,
92
+ full_outname=pathlib.Path(outfolder) / f"{row.id}.tif",
93
+ nworks=nworks
94
+ ): row.id for _, row in dataframe.iterrows()
95
+ }
96
+ for future in tqdm(as_completed(futures), total=len(futures)):
110
97
  try:
111
- fut.result()
112
- except Exception as exc: # noqa: BLE001 – log and keep going
113
- print(f"Download error: {exc}")
114
-
115
- download_df = requests._dataframe[["outname", "cs_cdf", "date"]].copy()
116
- download_df["outname"] = outfolder / requests._dataframe["outname"]
117
- download_df.rename(columns={"outname": "full_outname"}, inplace=True)
118
-
119
- return download_df
98
+ future.result()
99
+ except Exception as exc:
100
+ print(f"Download error for {futures[future]}: {exc}")
cubexpress/downloader.py CHANGED
@@ -13,24 +13,26 @@ from __future__ import annotations
13
13
 
14
14
  import json
15
15
  import pathlib
16
- import concurrent.futures
16
+ from concurrent.futures import ThreadPoolExecutor, as_completed
17
17
  from copy import deepcopy
18
- from typing import Any, Dict, List
18
+ from typing import Any, Dict
19
19
 
20
20
  import ee
21
21
  import rasterio as rio
22
22
  from rasterio.io import MemoryFile
23
23
  import logging
24
- from rasterio.merge import merge
25
- from rasterio.enums import Resampling
26
24
  import os
27
25
  import shutil
28
26
  import tempfile
27
+ from cubexpress.geospatial import merge_tifs
29
28
 
30
29
  os.environ['CPL_LOG_ERRORS'] = 'OFF'
31
30
  logging.getLogger('rasterio._env').setLevel(logging.ERROR)
32
31
 
33
- def download_manifest(ulist: Dict[str, Any], full_outname: pathlib.Path) -> None:
32
+ def download_manifest(
33
+ ulist: Dict[str, Any],
34
+ full_outname: pathlib.Path
35
+ ) -> None:
34
36
  """Download *ulist* and save it as *full_outname*.
35
37
 
36
38
  The manifest must include either an ``assetId`` or an ``expression``
@@ -44,36 +46,38 @@ def download_manifest(ulist: Dict[str, Any], full_outname: pathlib.Path) -> None
44
46
  ulist_deep = deepcopy(ulist)
45
47
  ulist_deep["expression"] = ee_image
46
48
  images_bytes = ee.data.computePixels(ulist_deep)
47
- else: # pragma: no cover
49
+ else:
48
50
  raise ValueError("Manifest does not contain 'assetId' or 'expression'")
49
-
50
- with MemoryFile(images_bytes) as memfile:
51
- with memfile.open() as src:
52
- profile = src.profile
53
- profile.update(
54
- driver="GTiff",
55
- tiled=True,
56
- interleave="band",
57
- blockxsize=256, # TODO: Creo que es 128 (por de la superresolucion)
58
- blockysize=256,
59
- compress="ZSTD",
60
- # zstd_level=13,
61
- predictor=2,
62
- num_threads=20,
63
- nodata=65535,
64
- dtype="uint16",
65
- count=13,
66
- photometric="MINISBLACK"
67
- )
68
-
69
- with rio.open(full_outname, "w", **profile) as dst:
70
- dst.write(src.read())
51
+
52
+ with open(full_outname, "wb") as src:
53
+ src.write(images_bytes)
54
+
55
+ # with MemoryFile(images_bytes) as memfile:
56
+ # with memfile.open() as src:
57
+ # profile = src.profile
58
+ # profile.update(
59
+ # driver="GTiff",
60
+ # tiled=True,
61
+ # interleave="band",
62
+ # blockxsize=256,
63
+ # blockysize=256,
64
+ # compress="ZSTD",
65
+ # zstd_level=13,
66
+ # predictor=2,
67
+ # num_threads=20,
68
+ # nodata=65535,
69
+ # dtype="uint16",
70
+ # count=12,
71
+ # photometric="MINISBLACK"
72
+ # )
73
+
74
+ # with rio.open(full_outname, "w", **profile) as dst:
75
+ # dst.write(src.read())
71
76
 
72
77
  def download_manifests(
73
78
  manifests: list[Dict[str, Any]],
74
79
  full_outname: pathlib.Path,
75
- join: bool = True,
76
- max_workers: int = 4,
80
+ max_workers: int,
77
81
  ) -> None:
78
82
  """Download every manifest in *manifests* concurrently.
79
83
 
@@ -81,55 +85,27 @@ def download_manifests(
81
85
  ``full_outname.parent/full_outname.stem`` with names ``000000.tif``,
82
86
  ``000001.tif`` … according to the list order.
83
87
  """
84
- # full_outname = pathlib.Path("/home/contreras/Documents/GitHub/cubexpress/cubexpress_test/2017-08-19_6mfrw_18LVN.tif")
85
- original_dir = full_outname.parent
86
- if join:
87
- tmp_dir = pathlib.Path(tempfile.mkdtemp(prefix="s2tmp_"))
88
- full_outname = tmp_dir / full_outname.name
89
-
90
- with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
91
- futures = []
92
-
93
- for index, umanifest in enumerate(manifests):
94
- folder = full_outname.parent / full_outname.stem
95
- folder.mkdir(parents=True, exist_ok=True)
96
- outname = folder / f"{index:06d}.tif"
97
- futures.append(executor.submit(download_manifest, umanifest, outname))
98
-
99
- for fut in concurrent.futures.as_completed(futures):
88
+ tmp_dir = pathlib.Path(tempfile.mkdtemp(prefix="cubexpress_"))
89
+ full_outname_temp = tmp_dir / full_outname.stem
90
+ full_outname_temp.mkdir(parents=True, exist_ok=True)
91
+
92
+ with ThreadPoolExecutor(max_workers=max_workers) as exe: # -
93
+ futures = {
94
+ exe.submit(
95
+ download_manifest,
96
+ ulist=umanifest,
97
+ full_outname=full_outname_temp / f"{index:06d}.tif"
98
+ ): umanifest for index, umanifest in enumerate(manifests)
99
+ }
100
+ for future in as_completed(futures):
100
101
  try:
101
- fut.result()
102
- except Exception as exc: # noqa: BLE001
103
- print(f"Error en una de las descargas: {exc}") # noqa: T201
104
-
105
- dir_path = full_outname.parent / full_outname.stem
106
- input_files = sorted(dir_path.glob("*.tif"))
107
-
108
- if dir_path.exists() and len(input_files) > 1:
109
-
110
- with rio.Env(GDAL_NUM_THREADS="8", NUM_THREADS="8"):
111
- srcs = [rio.open(fp) for fp in input_files]
112
- mosaic, out_transform = merge(
113
- srcs,
114
- nodata=65535,
115
- resampling=Resampling.nearest
116
- )
117
-
118
- meta = srcs[0].profile.copy()
119
- meta["transform"] = out_transform
120
- meta.update(
121
- height=mosaic.shape[1],
122
- width=mosaic.shape[2]
123
- )
124
- outname = original_dir / full_outname.name
125
- outname.parent.mkdir(parents=True, exist_ok=True)
126
- with rio.open(outname, "w", **meta) as dst:
127
- dst.write(mosaic)
128
-
129
- for src in srcs:
130
- src.close()
131
-
132
- # Delete a folder with pathlib
133
- shutil.rmtree(dir_path)
102
+ future.result()
103
+ except Exception as exc:
104
+ print(f"Error in one of the downloads: {exc}")
105
+
106
+ if full_outname_temp.exists():
107
+ input_files = sorted(full_outname_temp.glob("*.tif"))
108
+ merge_tifs(input_files, full_outname)
109
+ shutil.rmtree(full_outname_temp)
134
110
  else:
135
- return outname
111
+ raise ValueError(f"Error in {full_outname}")
cubexpress/geospatial.py CHANGED
@@ -2,6 +2,11 @@ import ee
2
2
  import re
3
3
  from copy import deepcopy
4
4
  from typing import Dict
5
+ import pathlib
6
+ import rasterio as rio
7
+ from rasterio.merge import merge
8
+ from rasterio.enums import Resampling
9
+
5
10
 
6
11
 
7
12
  def quadsplit_manifest(manifest: Dict, cell_width: int, cell_height: int, power: int) -> list[Dict]:
@@ -27,8 +32,6 @@ def quadsplit_manifest(manifest: Dict, cell_width: int, cell_height: int, power:
27
32
 
28
33
  return manifests
29
34
 
30
-
31
-
32
35
  def calculate_cell_size(ee_error_message: str, size: int) -> tuple[int, int]:
33
36
  match = re.findall(r'\d+', ee_error_message)
34
37
  image_pixel = int(match[0])
@@ -53,3 +56,64 @@ def _square_roi(lon: float, lat: float, edge_size: int, scale: int) -> ee.Geomet
53
56
  half = edge_size * scale / 2
54
57
  point = ee.Geometry.Point([lon, lat])
55
58
  return point.buffer(half).bounds()
59
+
60
+ def merge_tifs(
61
+ input_files: list[pathlib.Path],
62
+ output_path: pathlib.Path,
63
+ *,
64
+ nodata: int = 65535,
65
+ gdal_threads: int = 8
66
+ ) -> None:
67
+ """
68
+ Merge a list of GeoTIFF files into a single mosaic and write it out.
69
+
70
+ Parameters
71
+ ----------
72
+ input_files : list[Path]
73
+ Paths to the GeoTIFF tiles to be merged.
74
+ output_path : Path
75
+ Destination path for the merged GeoTIFF.
76
+ nodata : int, optional
77
+ NoData value to assign in the mosaic (default: 65535).
78
+ gdal_threads : int, optional
79
+ Number of GDAL threads to use for reading/writing (default: 8).
80
+
81
+ Raises
82
+ ------
83
+ ValueError
84
+ If `input_files` is empty.
85
+ """
86
+ if not input_files:
87
+ raise ValueError("The input_files list is empty")
88
+
89
+ # Ensure output path is a Path object
90
+ output_path = pathlib.Path(output_path).expanduser().resolve()
91
+ output_path.parent.mkdir(parents=True, exist_ok=True)
92
+
93
+ # Set GDAL threading environment
94
+ with rio.Env(GDAL_NUM_THREADS=str(gdal_threads), NUM_THREADS=str(gdal_threads)):
95
+ # Open all source datasets
96
+ srcs = [rio.open(fp) for fp in input_files]
97
+ try:
98
+ # Merge sources into one mosaic
99
+ mosaic, out_transform = merge(
100
+ srcs,
101
+ nodata=nodata,
102
+ resampling=Resampling.nearest
103
+ )
104
+
105
+ # Copy metadata from the first source and update it
106
+ meta = srcs[0].profile.copy()
107
+ meta.update({
108
+ "transform": out_transform,
109
+ "height": mosaic.shape[1],
110
+ "width": mosaic.shape[2]
111
+ })
112
+
113
+ # Write the merged mosaic to disk
114
+ with rio.open(output_path, "w", **meta) as dst:
115
+ dst.write(mosaic)
116
+ finally:
117
+ # Always close all open datasets
118
+ for src in srcs:
119
+ src.close()
cubexpress/geotyping.py CHANGED
@@ -259,13 +259,8 @@ class RequestSet(BaseModel):
259
259
  def create_manifests(self) -> pd.DataFrame:
260
260
  """
261
261
  Exports the raster metadata to a pandas DataFrame.
262
-
263
262
  Returns:
264
263
  pd.DataFrame: A DataFrame containing the metadata for all entries.
265
-
266
- Example:
267
- >>> df = raster_transform_set.export_df()
268
- >>> print(df)
269
264
  """
270
265
  # Use ProcessPoolExecutor for CPU-bound tasks to convert raster transforms to lon/lat
271
266
  with ProcessPoolExecutor(max_workers=None) as executor:
@@ -306,8 +301,8 @@ class RequestSet(BaseModel):
306
301
  "crsCode": meta.raster_transform.crs,
307
302
  },
308
303
  },
309
- "cs_cdf": int(meta.id.split("_")[-1]) / 100,
310
- "date": meta.id.split("_")[0],
304
+ # "cs_cdf": int(meta.id.split("_")[-1]) / 100,
305
+ # "date": meta.id.split("_")[0],
311
306
  "outname": f"{meta.id}.tif",
312
307
  }
313
308
 
cubexpress/request.py CHANGED
@@ -11,9 +11,9 @@ from cubexpress.conversion import lonlat2rt
11
11
 
12
12
 
13
13
  def table_to_requestset(
14
- table: pd.DataFrame,
15
- mosaic: bool = True
16
- ) -> RequestSet:
14
+ table: pd.DataFrame,
15
+ mosaic: bool = True
16
+ ) -> RequestSet:
17
17
  """Return a :class:`RequestSet` built from *df* (cloud_table result).
18
18
 
19
19
  Parameters
@@ -31,12 +31,11 @@ def table_to_requestset(
31
31
  If *df* is empty after filtering.
32
32
 
33
33
  """
34
-
35
34
 
36
35
  df = table.copy()
37
36
 
38
37
  if df.empty:
39
- raise ValueError("cloud_table returned no rows; nothing to request.")
38
+ raise ValueError("There are no images in the requested period. Please check your dates or your ubication.")
40
39
 
41
40
  rt = lonlat2rt(
42
41
  lon=df.attrs["lon"],
@@ -44,22 +43,30 @@ def table_to_requestset(
44
43
  edge_size=df.attrs["edge_size"],
45
44
  scale=df.attrs["scale"],
46
45
  )
46
+
47
47
  centre_hash = pgh.encode(df.attrs["lat"], df.attrs["lon"], precision=5)
48
- reqs: list[Request] = []
49
-
50
-
48
+ reqs = []
51
49
 
52
50
  if mosaic:
53
51
  grouped = (
54
- df.groupby('date')
52
+ df.groupby('date')
55
53
  .agg(
56
- id_list = ('id', list),
57
- cs_cdf_mean = ('cs_cdf', lambda x: int(round(x.mean(), 2) * 100))
54
+ id_list = ('id', list),
55
+ tiles = (
56
+ 'id',
57
+ lambda ids: ','.join(
58
+ sorted({i.split('_')[-1][1:] for i in ids})
59
+ )
60
+ ),
61
+ cs_cdf_mean = (
62
+ 'cs_cdf',
63
+ lambda x: int(round(x.mean(), 2) * 100)
64
+ )
58
65
  )
59
66
  )
60
67
 
61
68
  for day, row in grouped.iterrows():
62
-
69
+
63
70
  img_ids = row["id_list"]
64
71
  cdf = row["cs_cdf_mean"]
65
72
 
@@ -79,10 +86,11 @@ def table_to_requestset(
79
86
  )
80
87
  else:
81
88
  for img_id in img_ids:
82
- tile = img_id.split("_")[-1][1:]
89
+ # tile = img_id.split("_")[-1][1:]
83
90
  reqs.append(
84
91
  Request(
85
- id=f"{day}_{centre_hash}_{tile}_{cdf}",
92
+ # id=f"{day}_{centre_hash}_{tile}_{cdf}",
93
+ id=f"{day}_{centre_hash}_{cdf}",
86
94
  raster_transform=rt,
87
95
  image=f"{df.attrs['collection']}/{img_id}",
88
96
  bands=df.attrs["bands"],
@@ -94,14 +102,13 @@ def table_to_requestset(
94
102
  tile = img_id.split("_")[-1][1:]
95
103
  day = row["date"]
96
104
  cdf = int(round(row["cs_cdf"], 2) * 100)
97
-
98
105
  reqs.append(
99
106
  Request(
100
- id=f"{day}_{centre_hash}_{tile}_{cdf}",
107
+ id=f"{day}_{tile}_{cdf}",
101
108
  raster_transform=rt,
102
109
  image=f"{df.attrs['collection']}/{img_id}",
103
110
  bands=df.attrs["bands"],
104
111
  )
105
112
  )
106
113
 
107
- return RequestSet(requestset=reqs)
114
+ return RequestSet(requestset=reqs)
@@ -1,9 +1,8 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cubexpress
3
- Version: 0.1.10
3
+ Version: 0.1.12
4
4
  Summary: Efficient processing of cubic Earth-observation (EO) data.
5
5
  Home-page: https://github.com/andesdatacube/cubexpress
6
- License: MIT
7
6
  Keywords: earth-engine,sentinel-2,geospatial,eo,cube
8
7
  Author: Julio Contreras
9
8
  Author-email: contrerasnetk@gmail.com
@@ -32,7 +31,7 @@ Description-Content-Type: text/markdown
32
31
  <h1></h1>
33
32
 
34
33
  <p align="center">
35
- <img src="./docs/logo_cubexpress.png" width="39%">
34
+ <img src="https://raw.githubusercontent.com/andesdatacube/cubexpress/refs/heads/main/docs/logo_cubexpress.png" width="39%">
36
35
  </p>
37
36
 
38
37
  <p align="center">
@@ -0,0 +1,13 @@
1
+ cubexpress/__init__.py,sha256=G_9FKWrdFh44oYjD78jtH9gXqrdARIFFM0MaSl6WttY,606
2
+ cubexpress/cache.py,sha256=EZiR2AJfplaLpqMIVFb5piCAgFqHKF1vgLIrutfz8tA,1425
3
+ cubexpress/cloud_utils.py,sha256=EhLqHrefia7AUp75HZXKrQSPKQb6BSYFKD3hjI_sj1M,7681
4
+ cubexpress/conversion.py,sha256=JSaMnswY-2n5E4H2zxb-oEOTJ8UPzXfMeSVCremtvTw,2520
5
+ cubexpress/cube.py,sha256=lqJJyf1EmNYszIztx62OCrdo0HGtIveOmw-pFGI2nuI,3230
6
+ cubexpress/downloader.py,sha256=rnk-oX51_YFWz1iZuBWEYTDSTV48F780o1aujTsKCwE,3725
7
+ cubexpress/geospatial.py,sha256=jldZ-aFqUEvp1SF8ZJEa-pDHSAs3akzqk43dHai0TKM,3820
8
+ cubexpress/geotyping.py,sha256=uTaoZnaegGUShAHy-t0og22vpBbSrnJhCxp_4UoO9TM,16597
9
+ cubexpress/request.py,sha256=gLY8eBkMVal0uF6auGIfqmOhUPWFHwJzBxXOXU4aRuA,3370
10
+ cubexpress-0.1.12.dist-info/LICENSE,sha256=XjoS-d76b7Cl-VgCWhQk83tNf2dNldKBN8SrImwGc2Q,1072
11
+ cubexpress-0.1.12.dist-info/METADATA,sha256=u1EpUC4n5Jdpewvjv67l2IWp9ULCqRzshPqNTY85Tcs,9724
12
+ cubexpress-0.1.12.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
13
+ cubexpress-0.1.12.dist-info/RECORD,,
@@ -1,13 +0,0 @@
1
- cubexpress/__init__.py,sha256=RjyAqwiD0rU_Z5tCJTYNGKXZ1ggpfPB51wzhr0KwweY,570
2
- cubexpress/cache.py,sha256=EZiR2AJfplaLpqMIVFb5piCAgFqHKF1vgLIrutfz8tA,1425
3
- cubexpress/cloud_utils.py,sha256=Vr2A1SZDKP_2xNiLYgwmWOUX8P8I-pXQrxBETiUDq60,7441
4
- cubexpress/conversion.py,sha256=JSaMnswY-2n5E4H2zxb-oEOTJ8UPzXfMeSVCremtvTw,2520
5
- cubexpress/cube.py,sha256=tU0lqhtQUwEiz33yebYIbw-a0R4zmTAei-b_xqMIcWU,3719
6
- cubexpress/downloader.py,sha256=gHVNCNTwK9qA5MPaEHB_m0wOPprw010qaTVnszwbuUk,4668
7
- cubexpress/geospatial.py,sha256=ZbsPIgsYQFnNFXUuQ136rJsL4b2Bf91o0Vsswby2dFc,1812
8
- cubexpress/geotyping.py,sha256=XoSXQuoq5CfzKndM2Pko5KXIP0vxGNm02LOOMbCWkrs,16692
9
- cubexpress/request.py,sha256=jy5K9MQEurNlwhF0izFmoIh3o7m9bC97fsTT_7C7Gv0,3051
10
- cubexpress-0.1.10.dist-info/LICENSE,sha256=XjoS-d76b7Cl-VgCWhQk83tNf2dNldKBN8SrImwGc2Q,1072
11
- cubexpress-0.1.10.dist-info/METADATA,sha256=Tn_XBaLWbO4xbmzYJCM6vnbwRNZ1d1ABZ6uF4G4REYM,9664
12
- cubexpress-0.1.10.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
13
- cubexpress-0.1.10.dist-info/RECORD,,