cubexpress 0.1.7__tar.gz → 0.1.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cubexpress might be problematic. Click here for more details.
- {cubexpress-0.1.7 → cubexpress-0.1.9}/PKG-INFO +1 -1
- {cubexpress-0.1.7 → cubexpress-0.1.9}/cubexpress/__init__.py +2 -2
- {cubexpress-0.1.7 → cubexpress-0.1.9}/cubexpress/cloud_utils.py +87 -110
- {cubexpress-0.1.7 → cubexpress-0.1.9}/cubexpress/cube.py +25 -9
- {cubexpress-0.1.7 → cubexpress-0.1.9}/cubexpress/downloader.py +45 -5
- {cubexpress-0.1.7 → cubexpress-0.1.9}/cubexpress/geotyping.py +1 -1
- {cubexpress-0.1.7 → cubexpress-0.1.9}/cubexpress/request.py +23 -16
- {cubexpress-0.1.7 → cubexpress-0.1.9}/pyproject.toml +1 -1
- {cubexpress-0.1.7 → cubexpress-0.1.9}/LICENSE +0 -0
- {cubexpress-0.1.7 → cubexpress-0.1.9}/README.md +0 -0
- {cubexpress-0.1.7 → cubexpress-0.1.9}/cubexpress/cache.py +0 -0
- {cubexpress-0.1.7 → cubexpress-0.1.9}/cubexpress/conversion.py +0 -0
- {cubexpress-0.1.7 → cubexpress-0.1.9}/cubexpress/geospatial.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from cubexpress.conversion import lonlat2rt, geo2utm
|
|
2
2
|
from cubexpress.geotyping import RasterTransform, Request, RequestSet
|
|
3
|
-
from cubexpress.cloud_utils import
|
|
3
|
+
from cubexpress.cloud_utils import s2_cloud_table
|
|
4
4
|
from cubexpress.cube import get_cube
|
|
5
5
|
from cubexpress.request import table_to_requestset
|
|
6
6
|
|
|
@@ -15,7 +15,7 @@ __all__ = [
|
|
|
15
15
|
"RequestSet",
|
|
16
16
|
"geo2utm",
|
|
17
17
|
"get_cube",
|
|
18
|
-
"
|
|
18
|
+
"s2_cloud_table",
|
|
19
19
|
"table_to_requestset"
|
|
20
20
|
]
|
|
21
21
|
|
|
@@ -13,10 +13,6 @@ Both return a ``pandas.DataFrame`` with the columns **day**, **cloudPct** and
|
|
|
13
13
|
from __future__ import annotations
|
|
14
14
|
|
|
15
15
|
import datetime as dt
|
|
16
|
-
import json
|
|
17
|
-
import pathlib
|
|
18
|
-
from typing import List, Optional
|
|
19
|
-
|
|
20
16
|
import ee
|
|
21
17
|
import pandas as pd
|
|
22
18
|
|
|
@@ -28,102 +24,97 @@ def _cloud_table_single_range(
|
|
|
28
24
|
lon: float,
|
|
29
25
|
lat: float,
|
|
30
26
|
edge_size: int,
|
|
31
|
-
scale: int,
|
|
32
27
|
start: str,
|
|
33
|
-
end: str
|
|
34
|
-
collection: str = "COPERNICUS/S2_HARMONIZED",
|
|
28
|
+
end: str
|
|
35
29
|
) -> pd.DataFrame:
|
|
36
|
-
"""
|
|
30
|
+
"""
|
|
31
|
+
Build a daily cloud-score table for a square Sentinel-2 footprint.
|
|
37
32
|
|
|
38
33
|
Parameters
|
|
39
34
|
----------
|
|
40
|
-
lon, lat
|
|
41
|
-
|
|
42
|
-
edge_size
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
ISO-dates (``YYYY-MM-DD``) delimiting the query.
|
|
47
|
-
collection
|
|
48
|
-
Sentinel-2 collection name to query.
|
|
35
|
+
lon, lat : float
|
|
36
|
+
Point at the centre of the requested region (°).
|
|
37
|
+
edge_size : int
|
|
38
|
+
Side length of the square region in Sentinel-2 pixels (10 m each).
|
|
39
|
+
start, end : str
|
|
40
|
+
ISO-8601 dates delimiting the period, e.g. ``"2024-06-01"``.
|
|
49
41
|
|
|
50
42
|
Returns
|
|
51
43
|
-------
|
|
52
44
|
pandas.DataFrame
|
|
53
|
-
|
|
54
|
-
|
|
45
|
+
One row per image with columns:
|
|
46
|
+
* ``id`` – Sentinel-2 ID
|
|
47
|
+
* ``cs_cdf`` – Cloud Score Plus CDF (0–1)
|
|
48
|
+
* ``date`` – acquisition date (YYYY-MM-DD)
|
|
49
|
+
* ``high_null_flag`` – 1 if cloud score missing
|
|
50
|
+
|
|
51
|
+
Notes
|
|
52
|
+
-----
|
|
53
|
+
Missing ``cs_cdf`` values are filled with the mean of the same day.
|
|
55
54
|
"""
|
|
56
|
-
roi = _square_roi(lon, lat, edge_size, scale)
|
|
57
|
-
s2 = ee.ImageCollection(collection)
|
|
58
|
-
|
|
59
|
-
if collection in (
|
|
60
|
-
"COPERNICUS/S2_HARMONIZED",
|
|
61
|
-
"COPERNICUS/S2_SR_HARMONIZED",
|
|
62
|
-
):
|
|
63
|
-
qa_band = "cs_cdf"
|
|
64
|
-
csp = ee.ImageCollection("GOOGLE/CLOUD_SCORE_PLUS/V1/S2_HARMONIZED")
|
|
65
|
-
else:
|
|
66
|
-
qa_band, csp = None, None
|
|
67
|
-
|
|
68
|
-
def _add_props(img):
|
|
69
|
-
day = ee.Date(img.get("system:time_start")).format("YYYY-MM-dd")
|
|
70
|
-
imgid = img.get("system:index")
|
|
71
|
-
|
|
72
|
-
if qa_band:
|
|
73
|
-
score = (
|
|
74
|
-
img.linkCollection(csp, [qa_band])
|
|
75
|
-
.select([qa_band])
|
|
76
|
-
.reduceRegion(ee.Reducer.mean(), roi, scale)
|
|
77
|
-
.get(qa_band)
|
|
78
|
-
)
|
|
79
|
-
# If score is null assume completely clear (score=1 → cloudPct=0)
|
|
80
|
-
score_safe = ee.Algorithms.If(score, score, -1)
|
|
81
|
-
cloud_pct = (
|
|
82
|
-
ee.Number(1)
|
|
83
|
-
.subtract(ee.Number(score_safe))
|
|
84
|
-
.multiply(10000)
|
|
85
|
-
.round()
|
|
86
|
-
.divide(100)
|
|
87
|
-
)
|
|
88
|
-
else:
|
|
89
|
-
cloud_pct = ee.Number(-1)
|
|
90
|
-
|
|
91
|
-
return ee.Feature(
|
|
92
|
-
None,
|
|
93
|
-
{
|
|
94
|
-
"day": day,
|
|
95
|
-
"cloudPct": cloud_pct,
|
|
96
|
-
"images": imgid,
|
|
97
|
-
},
|
|
98
|
-
)
|
|
99
55
|
|
|
100
|
-
|
|
101
|
-
|
|
56
|
+
center = ee.Geometry.Point([lon, lat])
|
|
57
|
+
roi = _square_roi(lon, lat, edge_size, 10)
|
|
58
|
+
|
|
59
|
+
s2 = (
|
|
60
|
+
ee.ImageCollection("COPERNICUS/S2_HARMONIZED")
|
|
102
61
|
.filterBounds(roi)
|
|
103
|
-
.
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
62
|
+
.filterDate(start, end)
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
csp = ee.ImageCollection("GOOGLE/CLOUD_SCORE_PLUS/V1/S2_HARMONIZED")
|
|
66
|
+
|
|
67
|
+
ic = (
|
|
68
|
+
s2
|
|
69
|
+
.linkCollection(csp, ["cs_cdf"])
|
|
70
|
+
.select(["cs_cdf"])
|
|
107
71
|
)
|
|
108
72
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
73
|
+
# image IDs for every expected date
|
|
74
|
+
ids = ic.aggregate_array("system:index").getInfo()
|
|
75
|
+
df_ids = pd.DataFrame({"id": ids})
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
region_scale = edge_size * 10 / 2
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
try:
|
|
82
|
+
raw = ic.getRegion(geometry=center, scale=region_scale).getInfo()
|
|
83
|
+
except ee.ee_exception.EEException as e:
|
|
84
|
+
if "No bands in collection" in str(e):
|
|
85
|
+
return pd.DataFrame(
|
|
86
|
+
columns=["id", "cs_cdf", "date", "high_null_flag"]
|
|
87
|
+
)
|
|
88
|
+
raise
|
|
89
|
+
|
|
90
|
+
df_raw = pd.DataFrame(raw[1:], columns=raw[0])
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
df = (
|
|
94
|
+
df_ids
|
|
95
|
+
.merge(df_raw, on="id", how="left")
|
|
96
|
+
.assign(
|
|
97
|
+
date=lambda d: pd.to_datetime(d["id"].str[:8], format="%Y%m%d").dt.strftime("%Y-%m-%d"),
|
|
98
|
+
high_null_flag=lambda d: d["cs_cdf"].isna().astype(int),
|
|
99
|
+
)
|
|
100
|
+
.drop(columns=["longitude", "latitude", "time"])
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
# fill missing scores with daily mean
|
|
104
|
+
df["cs_cdf"] = df["cs_cdf"].fillna(df.groupby("date")["cs_cdf"].transform("mean"))
|
|
105
|
+
|
|
112
106
|
return df
|
|
113
107
|
|
|
114
108
|
|
|
115
|
-
def
|
|
109
|
+
def s2_cloud_table(
|
|
116
110
|
lon: float,
|
|
117
111
|
lat: float,
|
|
118
|
-
edge_size: int
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
collection: str = "COPERNICUS/S2_HARMONIZED",
|
|
125
|
-
output_path: str | pathlib.Path | None = None,
|
|
126
|
-
cache: bool = True,
|
|
112
|
+
edge_size: int,
|
|
113
|
+
start: str,
|
|
114
|
+
end: str,
|
|
115
|
+
max_cscore: float = 1.0,
|
|
116
|
+
min_cscore: float = 0.0,
|
|
117
|
+
cache: bool = False,
|
|
127
118
|
verbose: bool = True,
|
|
128
119
|
) -> pd.DataFrame:
|
|
129
120
|
"""Build (and cache) a per-day cloud-table for the requested ROI.
|
|
@@ -161,23 +152,10 @@ def cloud_table(
|
|
|
161
152
|
pandas.DataFrame
|
|
162
153
|
Filtered cloud table with ``.attrs`` containing the call parameters.
|
|
163
154
|
"""
|
|
164
|
-
if bands is None:
|
|
165
|
-
bands = [
|
|
166
|
-
"B1",
|
|
167
|
-
"B2",
|
|
168
|
-
"B3",
|
|
169
|
-
"B4",
|
|
170
|
-
"B5",
|
|
171
|
-
"B6",
|
|
172
|
-
"B7",
|
|
173
|
-
"B8",
|
|
174
|
-
"B8A",
|
|
175
|
-
"B9",
|
|
176
|
-
"B10",
|
|
177
|
-
"B11",
|
|
178
|
-
"B12",
|
|
179
|
-
]
|
|
180
155
|
|
|
156
|
+
bands = ["B1", "B2", "B3", "B4", "B5", "B6", "B7", "B8", "B8A", "B9", "B10", "B11", "B12"]
|
|
157
|
+
collection = "COPERNICUS/S2_HARMONIZED"
|
|
158
|
+
scale = 10
|
|
181
159
|
cache_file = _cache_key(lon, lat, edge_size, scale, collection)
|
|
182
160
|
|
|
183
161
|
# ─── 1. Load cached data if present ────────────────────────────────────
|
|
@@ -185,7 +163,7 @@ def cloud_table(
|
|
|
185
163
|
if verbose:
|
|
186
164
|
print("📂 Loading cached table …")
|
|
187
165
|
df_cached = pd.read_parquet(cache_file)
|
|
188
|
-
have_idx = pd.to_datetime(df_cached["
|
|
166
|
+
have_idx = pd.to_datetime(df_cached["date"], errors="coerce").dropna()
|
|
189
167
|
|
|
190
168
|
cached_start = have_idx.min().date()
|
|
191
169
|
cached_end = have_idx.max().date()
|
|
@@ -204,39 +182,40 @@ def cloud_table(
|
|
|
204
182
|
a1, b1 = start, cached_start.isoformat()
|
|
205
183
|
df_new_parts.append(
|
|
206
184
|
_cloud_table_single_range(
|
|
207
|
-
lon, lat, edge_size,
|
|
185
|
+
lon, lat, edge_size, a1, b1
|
|
208
186
|
)
|
|
209
187
|
)
|
|
210
188
|
if dt.date.fromisoformat(end) > cached_end:
|
|
211
189
|
a2, b2 = cached_end.isoformat(), end
|
|
212
190
|
df_new_parts.append(
|
|
213
191
|
_cloud_table_single_range(
|
|
214
|
-
lon, lat, edge_size,
|
|
192
|
+
lon, lat, edge_size, a2, b2
|
|
215
193
|
)
|
|
216
194
|
)
|
|
217
195
|
df_new = pd.concat(df_new_parts, ignore_index=True)
|
|
218
196
|
df_full = (
|
|
219
197
|
pd.concat([df_cached, df_new], ignore_index=True)
|
|
220
|
-
.
|
|
221
|
-
.sort_values("day", kind="mergesort")
|
|
198
|
+
.sort_values("date", kind="mergesort")
|
|
222
199
|
)
|
|
223
200
|
else:
|
|
224
|
-
|
|
201
|
+
|
|
225
202
|
if verbose:
|
|
226
203
|
msg = "Generating table (no cache found)…" if cache else "Generating table…"
|
|
227
204
|
print("⏳", msg)
|
|
228
205
|
df_full = _cloud_table_single_range(
|
|
229
|
-
lon, lat, edge_size,
|
|
206
|
+
lon, lat, edge_size, start, end
|
|
230
207
|
)
|
|
208
|
+
|
|
231
209
|
|
|
232
210
|
# ─── 2. Save cache ─────────────────────────────────────────────────────
|
|
233
211
|
if cache:
|
|
234
212
|
df_full.to_parquet(cache_file, compression="zstd")
|
|
235
213
|
|
|
236
214
|
# ─── 3. Filter by cloud cover and requested date window ────────────────
|
|
215
|
+
|
|
237
216
|
result = (
|
|
238
|
-
df_full.query("@start <=
|
|
239
|
-
.query("
|
|
217
|
+
df_full.query("@start <= date <= @end")
|
|
218
|
+
.query("@min_cscore <= cs_cdf <= @max_cscore")
|
|
240
219
|
.reset_index(drop=True)
|
|
241
220
|
)
|
|
242
221
|
|
|
@@ -248,9 +227,7 @@ def cloud_table(
|
|
|
248
227
|
"edge_size": edge_size,
|
|
249
228
|
"scale": scale,
|
|
250
229
|
"bands": bands,
|
|
251
|
-
"collection": collection
|
|
252
|
-
"cloud_max": cloud_max,
|
|
253
|
-
"output_path": str(output_path) if output_path else "",
|
|
230
|
+
"collection": collection
|
|
254
231
|
}
|
|
255
232
|
)
|
|
256
|
-
return result
|
|
233
|
+
return result
|
|
@@ -16,17 +16,22 @@ from __future__ import annotations
|
|
|
16
16
|
import pathlib
|
|
17
17
|
import concurrent.futures
|
|
18
18
|
from typing import Dict, Any
|
|
19
|
-
|
|
20
19
|
import ee
|
|
20
|
+
|
|
21
|
+
|
|
21
22
|
from cubexpress.downloader import download_manifest, download_manifests
|
|
22
23
|
from cubexpress.geospatial import quadsplit_manifest, calculate_cell_size
|
|
23
|
-
from cubexpress.
|
|
24
|
+
from cubexpress.request import table_to_requestset
|
|
25
|
+
import pandas as pd
|
|
24
26
|
|
|
25
27
|
|
|
26
28
|
def get_geotiff(
|
|
27
29
|
manifest: Dict[str, Any],
|
|
28
30
|
full_outname: pathlib.Path | str,
|
|
31
|
+
join: bool = True,
|
|
32
|
+
eraser: bool = True,
|
|
29
33
|
nworks: int = 4,
|
|
34
|
+
verbose: bool = True,
|
|
30
35
|
) -> None:
|
|
31
36
|
"""Download *manifest* to *full_outname*, retrying with tiled requests.
|
|
32
37
|
|
|
@@ -39,19 +44,26 @@ def get_geotiff(
|
|
|
39
44
|
nworks
|
|
40
45
|
Maximum worker threads when the image must be split; default **4**.
|
|
41
46
|
"""
|
|
47
|
+
full_outname = pathlib.Path(full_outname)
|
|
42
48
|
try:
|
|
43
|
-
download_manifest(manifest,
|
|
49
|
+
download_manifest(manifest, full_outname)
|
|
44
50
|
except ee.ee_exception.EEException as err:
|
|
45
|
-
|
|
51
|
+
|
|
46
52
|
size = manifest["grid"]["dimensions"]["width"] # square images assumed
|
|
47
53
|
cell_w, cell_h, power = calculate_cell_size(str(err), size)
|
|
48
54
|
tiled = quadsplit_manifest(manifest, cell_w, cell_h, power)
|
|
49
|
-
download_manifests(tiled,
|
|
55
|
+
download_manifests(tiled, full_outname, join, eraser, nworks)
|
|
56
|
+
|
|
57
|
+
if verbose:
|
|
58
|
+
print(f"Downloaded {full_outname}")
|
|
50
59
|
|
|
51
60
|
|
|
52
61
|
def get_cube(
|
|
53
|
-
|
|
62
|
+
table: pd.DataFrame,
|
|
54
63
|
outfolder: pathlib.Path | str,
|
|
64
|
+
join: bool = True,
|
|
65
|
+
eraser: bool = True,
|
|
66
|
+
mosaic: bool = True,
|
|
55
67
|
nworks: int = 4,
|
|
56
68
|
) -> None:
|
|
57
69
|
"""Download every request in *requests* to *outfolder* using a thread pool.
|
|
@@ -68,14 +80,18 @@ def get_cube(
|
|
|
68
80
|
nworks
|
|
69
81
|
Pool size for concurrent downloads; default **4**.
|
|
70
82
|
"""
|
|
71
|
-
|
|
83
|
+
|
|
84
|
+
requests = table_to_requestset(
|
|
85
|
+
table=table,
|
|
86
|
+
mosaic=mosaic
|
|
87
|
+
)
|
|
72
88
|
|
|
73
89
|
with concurrent.futures.ThreadPoolExecutor(max_workers=nworks) as pool:
|
|
74
90
|
futures = []
|
|
75
91
|
for _, row in requests._dataframe.iterrows():
|
|
76
|
-
outname =
|
|
92
|
+
outname = pathlib.Path(outfolder) / f"{row.id}.tif"
|
|
77
93
|
outname.parent.mkdir(parents=True, exist_ok=True)
|
|
78
|
-
futures.append(pool.submit(get_geotiff, row.manifest, outname, nworks))
|
|
94
|
+
futures.append(pool.submit(get_geotiff, row.manifest, outname, join, eraser, nworks))
|
|
79
95
|
|
|
80
96
|
for fut in concurrent.futures.as_completed(futures):
|
|
81
97
|
try:
|
|
@@ -21,7 +21,10 @@ import ee
|
|
|
21
21
|
import rasterio as rio
|
|
22
22
|
from rasterio.io import MemoryFile
|
|
23
23
|
import logging
|
|
24
|
+
from rasterio.merge import merge
|
|
25
|
+
from rasterio.enums import Resampling
|
|
24
26
|
import os
|
|
27
|
+
import shutil
|
|
25
28
|
|
|
26
29
|
os.environ['CPL_LOG_ERRORS'] = 'OFF'
|
|
27
30
|
logging.getLogger('rasterio._env').setLevel(logging.ERROR)
|
|
@@ -53,7 +56,7 @@ def download_manifest(ulist: Dict[str, Any], full_outname: pathlib.Path) -> None
|
|
|
53
56
|
blockxsize=256,
|
|
54
57
|
blockysize=256,
|
|
55
58
|
compress="ZSTD",
|
|
56
|
-
zstd_level=13,
|
|
59
|
+
# zstd_level=13,
|
|
57
60
|
predictor=2,
|
|
58
61
|
num_threads=20,
|
|
59
62
|
nodata=65535,
|
|
@@ -65,13 +68,12 @@ def download_manifest(ulist: Dict[str, Any], full_outname: pathlib.Path) -> None
|
|
|
65
68
|
with rio.open(full_outname, "w", **profile) as dst:
|
|
66
69
|
dst.write(src.read())
|
|
67
70
|
|
|
68
|
-
print(f"{full_outname} downloaded successfully.") # noqa: T201
|
|
69
|
-
|
|
70
|
-
|
|
71
71
|
def download_manifests(
|
|
72
72
|
manifests: List[Dict[str, Any]],
|
|
73
|
-
max_workers: int,
|
|
74
73
|
full_outname: pathlib.Path,
|
|
74
|
+
join: bool = True,
|
|
75
|
+
eraser: bool = True,
|
|
76
|
+
max_workers: int = 4,
|
|
75
77
|
) -> None:
|
|
76
78
|
"""Download every manifest in *manifests* concurrently.
|
|
77
79
|
|
|
@@ -93,3 +95,41 @@ def download_manifests(
|
|
|
93
95
|
fut.result()
|
|
94
96
|
except Exception as exc: # noqa: BLE001
|
|
95
97
|
print(f"Error en una de las descargas: {exc}") # noqa: T201
|
|
98
|
+
|
|
99
|
+
if join:
|
|
100
|
+
|
|
101
|
+
dir_path = full_outname.parent / full_outname.stem
|
|
102
|
+
input_files = sorted(dir_path.glob("*.tif"))
|
|
103
|
+
|
|
104
|
+
if dir_path.exists() and len(input_files) > 1:
|
|
105
|
+
|
|
106
|
+
with rio.Env(GDAL_NUM_THREADS="8", NUM_THREADS="8"):
|
|
107
|
+
srcs = [rio.open(fp) for fp in input_files]
|
|
108
|
+
mosaic, out_transform = merge(
|
|
109
|
+
srcs,
|
|
110
|
+
nodata=65535,
|
|
111
|
+
resampling=Resampling.nearest
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
meta = srcs[0].profile.copy()
|
|
115
|
+
meta["transform"] = out_transform
|
|
116
|
+
meta.update(
|
|
117
|
+
height=mosaic.shape[1],
|
|
118
|
+
width=mosaic.shape[2]
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
with rio.open(full_outname, "w", **meta) as dst:
|
|
122
|
+
dst.write(mosaic)
|
|
123
|
+
|
|
124
|
+
for src in srcs:
|
|
125
|
+
src.close()
|
|
126
|
+
|
|
127
|
+
if eraser:
|
|
128
|
+
# Delete a folder with pathlib
|
|
129
|
+
shutil.rmtree(dir_path)
|
|
130
|
+
|
|
131
|
+
print("✅ Mosaico generado:", full_outname)
|
|
132
|
+
return full_outname
|
|
133
|
+
|
|
134
|
+
else:
|
|
135
|
+
return full_outname
|
|
@@ -482,7 +482,7 @@ class RequestSet(BaseModel):
|
|
|
482
482
|
str: A string representation of the entire RasterTransformSet.
|
|
483
483
|
"""
|
|
484
484
|
num_entries = len(self.requestset)
|
|
485
|
-
return f"
|
|
485
|
+
return f"RequestSet({num_entries} entries)"
|
|
486
486
|
|
|
487
487
|
def __str__(self):
|
|
488
488
|
return super().__repr__()
|
|
@@ -5,13 +5,15 @@ from __future__ import annotations
|
|
|
5
5
|
import ee
|
|
6
6
|
import pandas as pd
|
|
7
7
|
import pygeohash as pgh
|
|
8
|
-
from typing import List
|
|
9
8
|
|
|
10
9
|
from cubexpress.geotyping import Request, RequestSet
|
|
11
10
|
from cubexpress.conversion import lonlat2rt
|
|
12
11
|
|
|
13
12
|
|
|
14
|
-
def table_to_requestset(
|
|
13
|
+
def table_to_requestset(
|
|
14
|
+
table: pd.DataFrame,
|
|
15
|
+
mosaic: bool = True
|
|
16
|
+
) -> RequestSet:
|
|
15
17
|
"""Return a :class:`RequestSet` built from *df* (cloud_table result).
|
|
16
18
|
|
|
17
19
|
Parameters
|
|
@@ -29,6 +31,10 @@ def table_to_requestset(df: pd.DataFrame, *, mosaic: bool = True) -> RequestSet:
|
|
|
29
31
|
If *df* is empty after filtering.
|
|
30
32
|
|
|
31
33
|
"""
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
df = table.copy()
|
|
37
|
+
|
|
32
38
|
if df.empty:
|
|
33
39
|
raise ValueError("cloud_table returned no rows; nothing to request.")
|
|
34
40
|
|
|
@@ -39,24 +45,23 @@ def table_to_requestset(df: pd.DataFrame, *, mosaic: bool = True) -> RequestSet:
|
|
|
39
45
|
scale=df.attrs["scale"],
|
|
40
46
|
)
|
|
41
47
|
centre_hash = pgh.encode(df.attrs["lat"], df.attrs["lon"], precision=5)
|
|
42
|
-
reqs:
|
|
48
|
+
reqs: list[Request] = []
|
|
43
49
|
|
|
44
50
|
if mosaic:
|
|
45
51
|
# group all asset IDs per day
|
|
46
52
|
grouped = (
|
|
47
|
-
df.
|
|
48
|
-
|
|
49
|
-
.groupby("day")["img"]
|
|
50
|
-
.apply(list)
|
|
53
|
+
df.groupby("date")["id"] # Series con listas de ids por día
|
|
54
|
+
.apply(list)
|
|
51
55
|
)
|
|
52
56
|
|
|
53
57
|
for day, img_ids in grouped.items():
|
|
54
58
|
ee_img = ee.ImageCollection(
|
|
55
59
|
[ee.Image(f"{df.attrs['collection']}/{img}") for img in img_ids]
|
|
56
60
|
).mosaic()
|
|
61
|
+
|
|
57
62
|
reqs.append(
|
|
58
63
|
Request(
|
|
59
|
-
id=f"{day}_{centre_hash}
|
|
64
|
+
id=f"{day}_{centre_hash}",
|
|
60
65
|
raster_transform=rt,
|
|
61
66
|
image=ee_img,
|
|
62
67
|
bands=df.attrs["bands"],
|
|
@@ -64,14 +69,16 @@ def table_to_requestset(df: pd.DataFrame, *, mosaic: bool = True) -> RequestSet:
|
|
|
64
69
|
)
|
|
65
70
|
else: # one request per asset
|
|
66
71
|
for _, row in df.iterrows():
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
72
|
+
img_id = row["id"]
|
|
73
|
+
day = row["date"]
|
|
74
|
+
|
|
75
|
+
reqs.append(
|
|
76
|
+
Request(
|
|
77
|
+
id=f"{day}_{centre_hash}_{img_id}",
|
|
78
|
+
raster_transform=rt,
|
|
79
|
+
image=f"{df.attrs['collection']}/{img_id}",
|
|
80
|
+
bands=df.attrs["bands"],
|
|
75
81
|
)
|
|
82
|
+
)
|
|
76
83
|
|
|
77
84
|
return RequestSet(requestset=reqs)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|