cubexpress 0.1.8__tar.gz → 0.1.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cubexpress might be problematic. Click here for more details.
- {cubexpress-0.1.8 → cubexpress-0.1.9}/PKG-INFO +1 -1
- {cubexpress-0.1.8 → cubexpress-0.1.9}/cubexpress/__init__.py +2 -2
- {cubexpress-0.1.8 → cubexpress-0.1.9}/cubexpress/cloud_utils.py +28 -19
- {cubexpress-0.1.8 → cubexpress-0.1.9}/cubexpress/cube.py +25 -9
- {cubexpress-0.1.8 → cubexpress-0.1.9}/cubexpress/downloader.py +45 -5
- {cubexpress-0.1.8 → cubexpress-0.1.9}/cubexpress/geotyping.py +1 -1
- {cubexpress-0.1.8 → cubexpress-0.1.9}/cubexpress/request.py +17 -15
- {cubexpress-0.1.8 → cubexpress-0.1.9}/pyproject.toml +1 -1
- {cubexpress-0.1.8 → cubexpress-0.1.9}/LICENSE +0 -0
- {cubexpress-0.1.8 → cubexpress-0.1.9}/README.md +0 -0
- {cubexpress-0.1.8 → cubexpress-0.1.9}/cubexpress/cache.py +0 -0
- {cubexpress-0.1.8 → cubexpress-0.1.9}/cubexpress/conversion.py +0 -0
- {cubexpress-0.1.8 → cubexpress-0.1.9}/cubexpress/geospatial.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from cubexpress.conversion import lonlat2rt, geo2utm
|
|
2
2
|
from cubexpress.geotyping import RasterTransform, Request, RequestSet
|
|
3
|
-
from cubexpress.cloud_utils import
|
|
3
|
+
from cubexpress.cloud_utils import s2_cloud_table
|
|
4
4
|
from cubexpress.cube import get_cube
|
|
5
5
|
from cubexpress.request import table_to_requestset
|
|
6
6
|
|
|
@@ -15,7 +15,7 @@ __all__ = [
|
|
|
15
15
|
"RequestSet",
|
|
16
16
|
"geo2utm",
|
|
17
17
|
"get_cube",
|
|
18
|
-
"
|
|
18
|
+
"s2_cloud_table",
|
|
19
19
|
"table_to_requestset"
|
|
20
20
|
]
|
|
21
21
|
|
|
@@ -27,25 +27,30 @@ def _cloud_table_single_range(
|
|
|
27
27
|
start: str,
|
|
28
28
|
end: str
|
|
29
29
|
) -> pd.DataFrame:
|
|
30
|
-
"""
|
|
30
|
+
"""
|
|
31
|
+
Build a daily cloud-score table for a square Sentinel-2 footprint.
|
|
31
32
|
|
|
32
33
|
Parameters
|
|
33
34
|
----------
|
|
34
|
-
lon, lat
|
|
35
|
-
|
|
36
|
-
edge_size
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
ISO-dates (``YYYY-MM-DD``) delimiting the query.
|
|
41
|
-
collection
|
|
42
|
-
Sentinel-2 collection name to query.
|
|
35
|
+
lon, lat : float
|
|
36
|
+
Point at the centre of the requested region (°).
|
|
37
|
+
edge_size : int
|
|
38
|
+
Side length of the square region in Sentinel-2 pixels (10 m each).
|
|
39
|
+
start, end : str
|
|
40
|
+
ISO-8601 dates delimiting the period, e.g. ``"2024-06-01"``.
|
|
43
41
|
|
|
44
42
|
Returns
|
|
45
43
|
-------
|
|
46
44
|
pandas.DataFrame
|
|
47
|
-
|
|
48
|
-
|
|
45
|
+
One row per image with columns:
|
|
46
|
+
* ``id`` – Sentinel-2 ID
|
|
47
|
+
* ``cs_cdf`` – Cloud Score Plus CDF (0–1)
|
|
48
|
+
* ``date`` – acquisition date (YYYY-MM-DD)
|
|
49
|
+
* ``high_null_flag`` – 1 if cloud score missing
|
|
50
|
+
|
|
51
|
+
Notes
|
|
52
|
+
-----
|
|
53
|
+
Missing ``cs_cdf`` values are filled with the mean of the same day.
|
|
49
54
|
"""
|
|
50
55
|
|
|
51
56
|
center = ee.Geometry.Point([lon, lat])
|
|
@@ -64,6 +69,8 @@ def _cloud_table_single_range(
|
|
|
64
69
|
.linkCollection(csp, ["cs_cdf"])
|
|
65
70
|
.select(["cs_cdf"])
|
|
66
71
|
)
|
|
72
|
+
|
|
73
|
+
# image IDs for every expected date
|
|
67
74
|
ids = ic.aggregate_array("system:index").getInfo()
|
|
68
75
|
df_ids = pd.DataFrame({"id": ids})
|
|
69
76
|
|
|
@@ -93,6 +100,7 @@ def _cloud_table_single_range(
|
|
|
93
100
|
.drop(columns=["longitude", "latitude", "time"])
|
|
94
101
|
)
|
|
95
102
|
|
|
103
|
+
# fill missing scores with daily mean
|
|
96
104
|
df["cs_cdf"] = df["cs_cdf"].fillna(df.groupby("date")["cs_cdf"].transform("mean"))
|
|
97
105
|
|
|
98
106
|
return df
|
|
@@ -101,11 +109,12 @@ def _cloud_table_single_range(
|
|
|
101
109
|
def s2_cloud_table(
|
|
102
110
|
lon: float,
|
|
103
111
|
lat: float,
|
|
104
|
-
edge_size: int
|
|
105
|
-
start: str
|
|
106
|
-
end: str
|
|
107
|
-
|
|
108
|
-
|
|
112
|
+
edge_size: int,
|
|
113
|
+
start: str,
|
|
114
|
+
end: str,
|
|
115
|
+
max_cscore: float = 1.0,
|
|
116
|
+
min_cscore: float = 0.0,
|
|
117
|
+
cache: bool = False,
|
|
109
118
|
verbose: bool = True,
|
|
110
119
|
) -> pd.DataFrame:
|
|
111
120
|
"""Build (and cache) a per-day cloud-table for the requested ROI.
|
|
@@ -206,7 +215,7 @@ def s2_cloud_table(
|
|
|
206
215
|
|
|
207
216
|
result = (
|
|
208
217
|
df_full.query("@start <= date <= @end")
|
|
209
|
-
.query("cs_cdf
|
|
218
|
+
.query("@min_cscore <= cs_cdf <= @max_cscore")
|
|
210
219
|
.reset_index(drop=True)
|
|
211
220
|
)
|
|
212
221
|
|
|
@@ -221,4 +230,4 @@ def s2_cloud_table(
|
|
|
221
230
|
"collection": collection
|
|
222
231
|
}
|
|
223
232
|
)
|
|
224
|
-
return result
|
|
233
|
+
return result
|
|
@@ -16,17 +16,22 @@ from __future__ import annotations
|
|
|
16
16
|
import pathlib
|
|
17
17
|
import concurrent.futures
|
|
18
18
|
from typing import Dict, Any
|
|
19
|
-
|
|
20
19
|
import ee
|
|
20
|
+
|
|
21
|
+
|
|
21
22
|
from cubexpress.downloader import download_manifest, download_manifests
|
|
22
23
|
from cubexpress.geospatial import quadsplit_manifest, calculate_cell_size
|
|
23
|
-
from cubexpress.
|
|
24
|
+
from cubexpress.request import table_to_requestset
|
|
25
|
+
import pandas as pd
|
|
24
26
|
|
|
25
27
|
|
|
26
28
|
def get_geotiff(
|
|
27
29
|
manifest: Dict[str, Any],
|
|
28
30
|
full_outname: pathlib.Path | str,
|
|
31
|
+
join: bool = True,
|
|
32
|
+
eraser: bool = True,
|
|
29
33
|
nworks: int = 4,
|
|
34
|
+
verbose: bool = True,
|
|
30
35
|
) -> None:
|
|
31
36
|
"""Download *manifest* to *full_outname*, retrying with tiled requests.
|
|
32
37
|
|
|
@@ -39,19 +44,26 @@ def get_geotiff(
|
|
|
39
44
|
nworks
|
|
40
45
|
Maximum worker threads when the image must be split; default **4**.
|
|
41
46
|
"""
|
|
47
|
+
full_outname = pathlib.Path(full_outname)
|
|
42
48
|
try:
|
|
43
|
-
download_manifest(manifest,
|
|
49
|
+
download_manifest(manifest, full_outname)
|
|
44
50
|
except ee.ee_exception.EEException as err:
|
|
45
|
-
|
|
51
|
+
|
|
46
52
|
size = manifest["grid"]["dimensions"]["width"] # square images assumed
|
|
47
53
|
cell_w, cell_h, power = calculate_cell_size(str(err), size)
|
|
48
54
|
tiled = quadsplit_manifest(manifest, cell_w, cell_h, power)
|
|
49
|
-
download_manifests(tiled,
|
|
55
|
+
download_manifests(tiled, full_outname, join, eraser, nworks)
|
|
56
|
+
|
|
57
|
+
if verbose:
|
|
58
|
+
print(f"Downloaded {full_outname}")
|
|
50
59
|
|
|
51
60
|
|
|
52
61
|
def get_cube(
|
|
53
|
-
|
|
62
|
+
table: pd.DataFrame,
|
|
54
63
|
outfolder: pathlib.Path | str,
|
|
64
|
+
join: bool = True,
|
|
65
|
+
eraser: bool = True,
|
|
66
|
+
mosaic: bool = True,
|
|
55
67
|
nworks: int = 4,
|
|
56
68
|
) -> None:
|
|
57
69
|
"""Download every request in *requests* to *outfolder* using a thread pool.
|
|
@@ -68,14 +80,18 @@ def get_cube(
|
|
|
68
80
|
nworks
|
|
69
81
|
Pool size for concurrent downloads; default **4**.
|
|
70
82
|
"""
|
|
71
|
-
|
|
83
|
+
|
|
84
|
+
requests = table_to_requestset(
|
|
85
|
+
table=table,
|
|
86
|
+
mosaic=mosaic
|
|
87
|
+
)
|
|
72
88
|
|
|
73
89
|
with concurrent.futures.ThreadPoolExecutor(max_workers=nworks) as pool:
|
|
74
90
|
futures = []
|
|
75
91
|
for _, row in requests._dataframe.iterrows():
|
|
76
|
-
outname =
|
|
92
|
+
outname = pathlib.Path(outfolder) / f"{row.id}.tif"
|
|
77
93
|
outname.parent.mkdir(parents=True, exist_ok=True)
|
|
78
|
-
futures.append(pool.submit(get_geotiff, row.manifest, outname, nworks))
|
|
94
|
+
futures.append(pool.submit(get_geotiff, row.manifest, outname, join, eraser, nworks))
|
|
79
95
|
|
|
80
96
|
for fut in concurrent.futures.as_completed(futures):
|
|
81
97
|
try:
|
|
@@ -21,7 +21,10 @@ import ee
|
|
|
21
21
|
import rasterio as rio
|
|
22
22
|
from rasterio.io import MemoryFile
|
|
23
23
|
import logging
|
|
24
|
+
from rasterio.merge import merge
|
|
25
|
+
from rasterio.enums import Resampling
|
|
24
26
|
import os
|
|
27
|
+
import shutil
|
|
25
28
|
|
|
26
29
|
os.environ['CPL_LOG_ERRORS'] = 'OFF'
|
|
27
30
|
logging.getLogger('rasterio._env').setLevel(logging.ERROR)
|
|
@@ -53,7 +56,7 @@ def download_manifest(ulist: Dict[str, Any], full_outname: pathlib.Path) -> None
|
|
|
53
56
|
blockxsize=256,
|
|
54
57
|
blockysize=256,
|
|
55
58
|
compress="ZSTD",
|
|
56
|
-
zstd_level=13,
|
|
59
|
+
# zstd_level=13,
|
|
57
60
|
predictor=2,
|
|
58
61
|
num_threads=20,
|
|
59
62
|
nodata=65535,
|
|
@@ -65,13 +68,12 @@ def download_manifest(ulist: Dict[str, Any], full_outname: pathlib.Path) -> None
|
|
|
65
68
|
with rio.open(full_outname, "w", **profile) as dst:
|
|
66
69
|
dst.write(src.read())
|
|
67
70
|
|
|
68
|
-
print(f"{full_outname} downloaded successfully.") # noqa: T201
|
|
69
|
-
|
|
70
|
-
|
|
71
71
|
def download_manifests(
|
|
72
72
|
manifests: List[Dict[str, Any]],
|
|
73
|
-
max_workers: int,
|
|
74
73
|
full_outname: pathlib.Path,
|
|
74
|
+
join: bool = True,
|
|
75
|
+
eraser: bool = True,
|
|
76
|
+
max_workers: int = 4,
|
|
75
77
|
) -> None:
|
|
76
78
|
"""Download every manifest in *manifests* concurrently.
|
|
77
79
|
|
|
@@ -93,3 +95,41 @@ def download_manifests(
|
|
|
93
95
|
fut.result()
|
|
94
96
|
except Exception as exc: # noqa: BLE001
|
|
95
97
|
print(f"Error en una de las descargas: {exc}") # noqa: T201
|
|
98
|
+
|
|
99
|
+
if join:
|
|
100
|
+
|
|
101
|
+
dir_path = full_outname.parent / full_outname.stem
|
|
102
|
+
input_files = sorted(dir_path.glob("*.tif"))
|
|
103
|
+
|
|
104
|
+
if dir_path.exists() and len(input_files) > 1:
|
|
105
|
+
|
|
106
|
+
with rio.Env(GDAL_NUM_THREADS="8", NUM_THREADS="8"):
|
|
107
|
+
srcs = [rio.open(fp) for fp in input_files]
|
|
108
|
+
mosaic, out_transform = merge(
|
|
109
|
+
srcs,
|
|
110
|
+
nodata=65535,
|
|
111
|
+
resampling=Resampling.nearest
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
meta = srcs[0].profile.copy()
|
|
115
|
+
meta["transform"] = out_transform
|
|
116
|
+
meta.update(
|
|
117
|
+
height=mosaic.shape[1],
|
|
118
|
+
width=mosaic.shape[2]
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
with rio.open(full_outname, "w", **meta) as dst:
|
|
122
|
+
dst.write(mosaic)
|
|
123
|
+
|
|
124
|
+
for src in srcs:
|
|
125
|
+
src.close()
|
|
126
|
+
|
|
127
|
+
if eraser:
|
|
128
|
+
# Delete a folder with pathlib
|
|
129
|
+
shutil.rmtree(dir_path)
|
|
130
|
+
|
|
131
|
+
print("✅ Mosaico generado:", full_outname)
|
|
132
|
+
return full_outname
|
|
133
|
+
|
|
134
|
+
else:
|
|
135
|
+
return full_outname
|
|
@@ -482,7 +482,7 @@ class RequestSet(BaseModel):
|
|
|
482
482
|
str: A string representation of the entire RasterTransformSet.
|
|
483
483
|
"""
|
|
484
484
|
num_entries = len(self.requestset)
|
|
485
|
-
return f"
|
|
485
|
+
return f"RequestSet({num_entries} entries)"
|
|
486
486
|
|
|
487
487
|
def __str__(self):
|
|
488
488
|
return super().__repr__()
|
|
@@ -5,13 +5,15 @@ from __future__ import annotations
|
|
|
5
5
|
import ee
|
|
6
6
|
import pandas as pd
|
|
7
7
|
import pygeohash as pgh
|
|
8
|
-
from typing import List
|
|
9
8
|
|
|
10
9
|
from cubexpress.geotyping import Request, RequestSet
|
|
11
10
|
from cubexpress.conversion import lonlat2rt
|
|
12
11
|
|
|
13
12
|
|
|
14
|
-
def table_to_requestset(
|
|
13
|
+
def table_to_requestset(
|
|
14
|
+
table: pd.DataFrame,
|
|
15
|
+
mosaic: bool = True
|
|
16
|
+
) -> RequestSet:
|
|
15
17
|
"""Return a :class:`RequestSet` built from *df* (cloud_table result).
|
|
16
18
|
|
|
17
19
|
Parameters
|
|
@@ -31,30 +33,30 @@ def table_to_requestset(df: pd.DataFrame, *, mosaic: bool = True) -> RequestSet:
|
|
|
31
33
|
"""
|
|
32
34
|
|
|
33
35
|
|
|
34
|
-
|
|
36
|
+
df = table.copy()
|
|
35
37
|
|
|
36
|
-
if
|
|
38
|
+
if df.empty:
|
|
37
39
|
raise ValueError("cloud_table returned no rows; nothing to request.")
|
|
38
40
|
|
|
39
41
|
rt = lonlat2rt(
|
|
40
|
-
lon=
|
|
41
|
-
lat=
|
|
42
|
-
edge_size=
|
|
43
|
-
scale=
|
|
42
|
+
lon=df.attrs["lon"],
|
|
43
|
+
lat=df.attrs["lat"],
|
|
44
|
+
edge_size=df.attrs["edge_size"],
|
|
45
|
+
scale=df.attrs["scale"],
|
|
44
46
|
)
|
|
45
|
-
centre_hash = pgh.encode(
|
|
47
|
+
centre_hash = pgh.encode(df.attrs["lat"], df.attrs["lon"], precision=5)
|
|
46
48
|
reqs: list[Request] = []
|
|
47
49
|
|
|
48
50
|
if mosaic:
|
|
49
51
|
# group all asset IDs per day
|
|
50
52
|
grouped = (
|
|
51
|
-
|
|
53
|
+
df.groupby("date")["id"] # Series con listas de ids por día
|
|
52
54
|
.apply(list)
|
|
53
55
|
)
|
|
54
56
|
|
|
55
57
|
for day, img_ids in grouped.items():
|
|
56
58
|
ee_img = ee.ImageCollection(
|
|
57
|
-
[ee.Image(f"{
|
|
59
|
+
[ee.Image(f"{df.attrs['collection']}/{img}") for img in img_ids]
|
|
58
60
|
).mosaic()
|
|
59
61
|
|
|
60
62
|
reqs.append(
|
|
@@ -62,11 +64,11 @@ def table_to_requestset(df: pd.DataFrame, *, mosaic: bool = True) -> RequestSet:
|
|
|
62
64
|
id=f"{day}_{centre_hash}",
|
|
63
65
|
raster_transform=rt,
|
|
64
66
|
image=ee_img,
|
|
65
|
-
bands=
|
|
67
|
+
bands=df.attrs["bands"],
|
|
66
68
|
)
|
|
67
69
|
)
|
|
68
70
|
else: # one request per asset
|
|
69
|
-
for _, row in
|
|
71
|
+
for _, row in df.iterrows():
|
|
70
72
|
img_id = row["id"]
|
|
71
73
|
day = row["date"]
|
|
72
74
|
|
|
@@ -74,8 +76,8 @@ def table_to_requestset(df: pd.DataFrame, *, mosaic: bool = True) -> RequestSet:
|
|
|
74
76
|
Request(
|
|
75
77
|
id=f"{day}_{centre_hash}_{img_id}",
|
|
76
78
|
raster_transform=rt,
|
|
77
|
-
image=f"{
|
|
78
|
-
bands=
|
|
79
|
+
image=f"{df.attrs['collection']}/{img_id}",
|
|
80
|
+
bands=df.attrs["bands"],
|
|
79
81
|
)
|
|
80
82
|
)
|
|
81
83
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|