cubexpress 0.1.11__py3-none-any.whl → 0.1.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cubexpress might be problematic. Click here for more details.
- cubexpress/__init__.py +3 -9
- cubexpress/cloud_utils.py +70 -59
- cubexpress/cube.py +36 -62
- cubexpress/downloader.py +53 -61
- cubexpress/geospatial.py +0 -2
- cubexpress/geotyping.py +1 -15
- cubexpress/request.py +10 -13
- {cubexpress-0.1.11.dist-info → cubexpress-0.1.13.dist-info}/METADATA +2 -2
- cubexpress-0.1.13.dist-info/RECORD +13 -0
- cubexpress-0.1.11.dist-info/RECORD +0 -13
- {cubexpress-0.1.11.dist-info → cubexpress-0.1.13.dist-info}/LICENSE +0 -0
- {cubexpress-0.1.11.dist-info → cubexpress-0.1.13.dist-info}/WHEEL +0 -0
cubexpress/__init__.py
CHANGED
|
@@ -1,13 +1,11 @@
|
|
|
1
1
|
from cubexpress.conversion import lonlat2rt, geo2utm
|
|
2
2
|
from cubexpress.geotyping import RasterTransform, Request, RequestSet, GeotransformDict
|
|
3
|
-
from cubexpress.cloud_utils import
|
|
3
|
+
from cubexpress.cloud_utils import s2_table
|
|
4
4
|
from cubexpress.cube import get_cube
|
|
5
5
|
from cubexpress.request import table_to_requestset
|
|
6
|
+
import importlib.metadata
|
|
6
7
|
|
|
7
8
|
|
|
8
|
-
|
|
9
|
-
# pyproj
|
|
10
|
-
# Export the functions
|
|
11
9
|
__all__ = [
|
|
12
10
|
"lonlat2rt",
|
|
13
11
|
"RasterTransform",
|
|
@@ -16,11 +14,7 @@ __all__ = [
|
|
|
16
14
|
"RequestSet",
|
|
17
15
|
"geo2utm",
|
|
18
16
|
"get_cube",
|
|
19
|
-
"
|
|
17
|
+
"s2_table",
|
|
20
18
|
"table_to_requestset"
|
|
21
19
|
]
|
|
22
|
-
|
|
23
|
-
# # Dynamic version import
|
|
24
|
-
# import importlib.metadata
|
|
25
|
-
|
|
26
20
|
# __version__ = importlib.metadata.version("cubexpress")
|
cubexpress/cloud_utils.py
CHANGED
|
@@ -15,9 +15,11 @@ from __future__ import annotations
|
|
|
15
15
|
import datetime as dt
|
|
16
16
|
import ee
|
|
17
17
|
import pandas as pd
|
|
18
|
-
|
|
19
18
|
from cubexpress.cache import _cache_key
|
|
19
|
+
import datetime as dt
|
|
20
20
|
from cubexpress.geospatial import _square_roi
|
|
21
|
+
import warnings
|
|
22
|
+
warnings.filterwarnings('ignore', category=DeprecationWarning)
|
|
21
23
|
|
|
22
24
|
|
|
23
25
|
def _cloud_table_single_range(
|
|
@@ -55,61 +57,68 @@ def _cloud_table_single_range(
|
|
|
55
57
|
|
|
56
58
|
center = ee.Geometry.Point([lon, lat])
|
|
57
59
|
roi = _square_roi(lon, lat, edge_size, 10)
|
|
58
|
-
|
|
60
|
+
|
|
59
61
|
s2 = (
|
|
60
|
-
ee.ImageCollection("COPERNICUS/
|
|
62
|
+
ee.ImageCollection("COPERNICUS/S2_SR_HARMONIZED")
|
|
61
63
|
.filterBounds(roi)
|
|
62
64
|
.filterDate(start, end)
|
|
63
65
|
)
|
|
64
66
|
|
|
65
|
-
csp = ee.ImageCollection("GOOGLE/CLOUD_SCORE_PLUS/V1/S2_HARMONIZED")
|
|
66
|
-
|
|
67
67
|
ic = (
|
|
68
68
|
s2
|
|
69
|
-
.linkCollection(
|
|
69
|
+
.linkCollection(
|
|
70
|
+
ee.ImageCollection("GOOGLE/CLOUD_SCORE_PLUS/V1/S2_HARMONIZED"),
|
|
71
|
+
["cs_cdf"]
|
|
72
|
+
)
|
|
70
73
|
.select(["cs_cdf"])
|
|
71
74
|
)
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
75
|
+
|
|
76
|
+
ids_inside = (
|
|
77
|
+
ic
|
|
78
|
+
.map(
|
|
79
|
+
lambda img: img.set(
|
|
80
|
+
'roi_inside_scene',
|
|
81
|
+
img.geometry().contains(roi, maxError=10)
|
|
82
|
+
)
|
|
83
|
+
)
|
|
84
|
+
.filter(ee.Filter.eq('roi_inside_scene', True))
|
|
85
|
+
.aggregate_array('system:index')
|
|
86
|
+
.getInfo()
|
|
87
|
+
)
|
|
88
|
+
|
|
81
89
|
try:
|
|
82
|
-
raw = ic.getRegion(
|
|
90
|
+
raw = ic.getRegion(
|
|
91
|
+
geometry=center,
|
|
92
|
+
scale=(edge_size) * 11
|
|
93
|
+
).getInfo()
|
|
83
94
|
except ee.ee_exception.EEException as e:
|
|
84
95
|
if "No bands in collection" in str(e):
|
|
85
96
|
return pd.DataFrame(
|
|
86
|
-
columns=["id", "
|
|
97
|
+
columns=["id", "longitude", "latitude", "time", "cs_cdf", "inside"]
|
|
87
98
|
)
|
|
88
|
-
raise
|
|
89
|
-
|
|
90
|
-
df_raw =
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
df = (
|
|
94
|
-
df_ids
|
|
95
|
-
.merge(df_raw, on="id", how="left")
|
|
99
|
+
raise e
|
|
100
|
+
|
|
101
|
+
df_raw = (
|
|
102
|
+
pd.DataFrame(raw[1:], columns=raw[0])
|
|
103
|
+
.drop(columns=["longitude", "latitude"])
|
|
96
104
|
.assign(
|
|
97
|
-
date=lambda d: pd.to_datetime(d["id"].str[:8], format="%Y%m%d").dt.strftime("%Y-%m-%d")
|
|
98
|
-
null_flag=lambda d: d["cs_cdf"].isna().astype(int),
|
|
105
|
+
date=lambda d: pd.to_datetime(d["id"].str[:8], format="%Y%m%d").dt.strftime("%Y-%m-%d")
|
|
99
106
|
)
|
|
100
|
-
.drop(columns=["longitude", "latitude", "time"])
|
|
101
107
|
)
|
|
108
|
+
|
|
109
|
+
df_raw["inside"] = df_raw["id"].isin(set(ids_inside)).astype(int)
|
|
110
|
+
|
|
111
|
+
df_raw['cs_cdf'] = df_raw.groupby('date').apply(
|
|
112
|
+
lambda group: group['cs_cdf'].transform(
|
|
113
|
+
lambda _: group[group['inside'] == 1]['cs_cdf'].iloc[0]
|
|
114
|
+
if (group['inside'] == 1).any()
|
|
115
|
+
else group['cs_cdf'].mean()
|
|
116
|
+
)
|
|
117
|
+
).reset_index(drop=True)
|
|
102
118
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
df["cs_cdf"] = df["cs_cdf"].fillna(df.groupby("date")["cs_cdf"].transform("mean"))
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
return df
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
def s2_cloud_table(
|
|
119
|
+
return df_raw
|
|
120
|
+
|
|
121
|
+
def s2_table(
|
|
113
122
|
lon: float,
|
|
114
123
|
lat: float,
|
|
115
124
|
edge_size: int,
|
|
@@ -117,8 +126,7 @@ def s2_cloud_table(
|
|
|
117
126
|
end: str,
|
|
118
127
|
max_cscore: float = 1.0,
|
|
119
128
|
min_cscore: float = 0.0,
|
|
120
|
-
cache: bool = False
|
|
121
|
-
verbose: bool = True,
|
|
129
|
+
cache: bool = False
|
|
122
130
|
) -> pd.DataFrame:
|
|
123
131
|
"""Build (and cache) a per-day cloud-table for the requested ROI.
|
|
124
132
|
|
|
@@ -147,9 +155,7 @@ def s2_cloud_table(
|
|
|
147
155
|
Downstream path hint stored in ``result.attrs``; not used internally.
|
|
148
156
|
cache
|
|
149
157
|
Toggle parquet caching.
|
|
150
|
-
|
|
151
|
-
If *True* prints cache info/progress.
|
|
152
|
-
|
|
158
|
+
|
|
153
159
|
Returns
|
|
154
160
|
-------
|
|
155
161
|
pandas.DataFrame
|
|
@@ -161,10 +167,9 @@ def s2_cloud_table(
|
|
|
161
167
|
scale = 10
|
|
162
168
|
cache_file = _cache_key(lon, lat, edge_size, scale, collection)
|
|
163
169
|
|
|
164
|
-
#
|
|
170
|
+
# Load cached data if present
|
|
165
171
|
if cache and cache_file.exists():
|
|
166
|
-
|
|
167
|
-
print("📂 Loading cached metadata …")
|
|
172
|
+
print("📂 Loading cached metadata …")
|
|
168
173
|
df_cached = pd.read_parquet(cache_file)
|
|
169
174
|
have_idx = pd.to_datetime(df_cached["date"], errors="coerce").dropna()
|
|
170
175
|
|
|
@@ -175,8 +180,7 @@ def s2_cloud_table(
|
|
|
175
180
|
dt.date.fromisoformat(start) >= cached_start
|
|
176
181
|
and dt.date.fromisoformat(end) <= cached_end
|
|
177
182
|
):
|
|
178
|
-
|
|
179
|
-
print("✅ Served entirely from metadata.")
|
|
183
|
+
print("✅ Served entirely from metadata.")
|
|
180
184
|
df_full = df_cached
|
|
181
185
|
else:
|
|
182
186
|
# Identify missing segments and fetch only those.
|
|
@@ -185,14 +189,22 @@ def s2_cloud_table(
|
|
|
185
189
|
a1, b1 = start, cached_start.isoformat()
|
|
186
190
|
df_new_parts.append(
|
|
187
191
|
_cloud_table_single_range(
|
|
188
|
-
lon,
|
|
192
|
+
lon=lon,
|
|
193
|
+
lat=lat,
|
|
194
|
+
edge_size=edge_size,
|
|
195
|
+
start=a1,
|
|
196
|
+
end=b1
|
|
189
197
|
)
|
|
190
198
|
)
|
|
191
199
|
if dt.date.fromisoformat(end) > cached_end:
|
|
192
200
|
a2, b2 = cached_end.isoformat(), end
|
|
193
201
|
df_new_parts.append(
|
|
194
202
|
_cloud_table_single_range(
|
|
195
|
-
lon,
|
|
203
|
+
lon=lon,
|
|
204
|
+
lat=lat,
|
|
205
|
+
edge_size=edge_size,
|
|
206
|
+
start=a2,
|
|
207
|
+
end=b2
|
|
196
208
|
)
|
|
197
209
|
)
|
|
198
210
|
df_new_parts = [df for df in df_new_parts if not df.empty]
|
|
@@ -207,21 +219,20 @@ def s2_cloud_table(
|
|
|
207
219
|
else:
|
|
208
220
|
df_full = df_cached
|
|
209
221
|
else:
|
|
210
|
-
|
|
211
|
-
if verbose:
|
|
212
|
-
msg = "Generating metadata (no cache found)…" if cache else "Generating metadata…"
|
|
213
|
-
print("⏳", msg)
|
|
222
|
+
print("⏳ Generating metadata…")
|
|
214
223
|
df_full = _cloud_table_single_range(
|
|
215
|
-
lon,
|
|
224
|
+
lon=lon,
|
|
225
|
+
lat=lat,
|
|
226
|
+
edge_size=edge_size,
|
|
227
|
+
start=start,
|
|
228
|
+
end=end
|
|
216
229
|
)
|
|
217
|
-
|
|
218
230
|
|
|
219
|
-
#
|
|
231
|
+
# Save cache
|
|
220
232
|
if cache:
|
|
221
233
|
df_full.to_parquet(cache_file, compression="zstd")
|
|
222
234
|
|
|
223
|
-
#
|
|
224
|
-
|
|
235
|
+
# Filter by cloud cover and requested date window
|
|
225
236
|
result = (
|
|
226
237
|
df_full.query("@start <= date <= @end")
|
|
227
238
|
.query("@min_cscore <= cs_cdf <= @max_cscore")
|
cubexpress/cube.py
CHANGED
|
@@ -14,9 +14,10 @@ The core download/split logic lives in *cubexpress.downloader* and
|
|
|
14
14
|
from __future__ import annotations
|
|
15
15
|
|
|
16
16
|
import pathlib
|
|
17
|
-
|
|
17
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
18
18
|
from typing import Dict, Any
|
|
19
19
|
import ee
|
|
20
|
+
from tqdm import tqdm
|
|
20
21
|
|
|
21
22
|
|
|
22
23
|
from cubexpress.downloader import download_manifest, download_manifests
|
|
@@ -29,9 +30,7 @@ from cubexpress.geotyping import RequestSet
|
|
|
29
30
|
def get_geotiff(
|
|
30
31
|
manifest: Dict[str, Any],
|
|
31
32
|
full_outname: pathlib.Path | str,
|
|
32
|
-
|
|
33
|
-
nworks: int = 4,
|
|
34
|
-
verbose: bool = True,
|
|
33
|
+
nworks: int = 4
|
|
35
34
|
) -> None:
|
|
36
35
|
"""Download *manifest* to *full_outname*, retrying with tiled requests.
|
|
37
36
|
|
|
@@ -44,34 +43,27 @@ def get_geotiff(
|
|
|
44
43
|
nworks
|
|
45
44
|
Maximum worker threads when the image must be split; default **4**.
|
|
46
45
|
"""
|
|
47
|
-
|
|
46
|
+
|
|
48
47
|
try:
|
|
49
|
-
download_manifest(
|
|
48
|
+
download_manifest(
|
|
49
|
+
ulist=manifest,
|
|
50
|
+
full_outname=full_outname
|
|
51
|
+
)
|
|
50
52
|
except ee.ee_exception.EEException as err:
|
|
51
|
-
|
|
52
|
-
size = manifest["grid"]["dimensions"]["width"] # square images assumed
|
|
53
|
+
size = manifest["grid"]["dimensions"]["width"]
|
|
53
54
|
cell_w, cell_h, power = calculate_cell_size(str(err), size)
|
|
54
55
|
tiled = quadsplit_manifest(manifest, cell_w, cell_h, power)
|
|
56
|
+
|
|
55
57
|
download_manifests(
|
|
56
|
-
manifests
|
|
57
|
-
full_outname
|
|
58
|
-
|
|
59
|
-
max_workers = nworks
|
|
58
|
+
manifests=tiled,
|
|
59
|
+
full_outname=full_outname,
|
|
60
|
+
max_workers=nworks
|
|
60
61
|
)
|
|
61
62
|
|
|
62
|
-
if verbose:
|
|
63
|
-
print(f"Downloaded {full_outname}")
|
|
64
|
-
|
|
65
|
-
|
|
66
63
|
def get_cube(
|
|
67
|
-
# table: pd.DataFrame,
|
|
68
64
|
requests: pd.DataFrame | RequestSet,
|
|
69
65
|
outfolder: pathlib.Path | str,
|
|
70
|
-
|
|
71
|
-
join: bool = True,
|
|
72
|
-
nworks: int = 4,
|
|
73
|
-
verbose: bool = True,
|
|
74
|
-
cache: bool = True
|
|
66
|
+
nworks: int = 4
|
|
75
67
|
) -> None:
|
|
76
68
|
"""Download every request in *requests* to *outfolder* using a thread pool.
|
|
77
69
|
|
|
@@ -87,46 +79,28 @@ def get_cube(
|
|
|
87
79
|
nworks
|
|
88
80
|
Pool size for concurrent downloads; default **4**.
|
|
89
81
|
"""
|
|
90
|
-
|
|
91
|
-
# requests = table_to_requestset(
|
|
92
|
-
# table=table,
|
|
93
|
-
# mosaic=mosaic
|
|
94
|
-
# )
|
|
95
82
|
|
|
96
83
|
outfolder = pathlib.Path(outfolder).expanduser().resolve()
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
for fut in concurrent.futures.as_completed(futures):
|
|
84
|
+
outfolder.mkdir(parents=True, exist_ok=True)
|
|
85
|
+
dataframe = requests._dataframe if isinstance(requests, RequestSet) else requests
|
|
86
|
+
|
|
87
|
+
with ThreadPoolExecutor(max_workers=nworks) as executor:
|
|
88
|
+
futures = {
|
|
89
|
+
executor.submit(
|
|
90
|
+
get_geotiff,
|
|
91
|
+
manifest=row.manifest,
|
|
92
|
+
full_outname=pathlib.Path(outfolder) / f"{row.id}.tif",
|
|
93
|
+
nworks=nworks
|
|
94
|
+
): row.id for _, row in dataframe.iterrows()
|
|
95
|
+
}
|
|
96
|
+
for future in tqdm(
|
|
97
|
+
as_completed(futures),
|
|
98
|
+
total=len(futures),
|
|
99
|
+
desc="Downloading images",
|
|
100
|
+
unit="image",
|
|
101
|
+
leave=True
|
|
102
|
+
):
|
|
117
103
|
try:
|
|
118
|
-
|
|
119
|
-
except Exception as exc:
|
|
120
|
-
print(f"Download error: {exc}")
|
|
121
|
-
|
|
122
|
-
# download_df = requests._dataframe[["outname", "cs_cdf", "date"]].copy()
|
|
123
|
-
# download_df["outname"] = outfolder / requests._dataframe["outname"]
|
|
124
|
-
# download_df.rename(columns={"outname": "full_outname"}, inplace=True)
|
|
125
|
-
|
|
126
|
-
return
|
|
127
|
-
|
|
128
|
-
# manifest = row.manifest
|
|
129
|
-
# full_outname = outname
|
|
130
|
-
# join: bool = True,
|
|
131
|
-
# nworks: int = 4,
|
|
132
|
-
# verbose: bool = True,
|
|
104
|
+
future.result()
|
|
105
|
+
except Exception as exc:
|
|
106
|
+
print(f"Download error for {futures[future]}: {exc}")
|
cubexpress/downloader.py
CHANGED
|
@@ -13,16 +13,14 @@ from __future__ import annotations
|
|
|
13
13
|
|
|
14
14
|
import json
|
|
15
15
|
import pathlib
|
|
16
|
-
|
|
16
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
17
17
|
from copy import deepcopy
|
|
18
|
-
from typing import Any, Dict
|
|
18
|
+
from typing import Any, Dict
|
|
19
19
|
|
|
20
20
|
import ee
|
|
21
21
|
import rasterio as rio
|
|
22
22
|
from rasterio.io import MemoryFile
|
|
23
23
|
import logging
|
|
24
|
-
from rasterio.merge import merge
|
|
25
|
-
from rasterio.enums import Resampling
|
|
26
24
|
import os
|
|
27
25
|
import shutil
|
|
28
26
|
import tempfile
|
|
@@ -31,7 +29,10 @@ from cubexpress.geospatial import merge_tifs
|
|
|
31
29
|
os.environ['CPL_LOG_ERRORS'] = 'OFF'
|
|
32
30
|
logging.getLogger('rasterio._env').setLevel(logging.ERROR)
|
|
33
31
|
|
|
34
|
-
def download_manifest(
|
|
32
|
+
def download_manifest(
|
|
33
|
+
ulist: Dict[str, Any],
|
|
34
|
+
full_outname: pathlib.Path
|
|
35
|
+
) -> None:
|
|
35
36
|
"""Download *ulist* and save it as *full_outname*.
|
|
36
37
|
|
|
37
38
|
The manifest must include either an ``assetId`` or an ``expression``
|
|
@@ -45,36 +46,38 @@ def download_manifest(ulist: Dict[str, Any], full_outname: pathlib.Path) -> None
|
|
|
45
46
|
ulist_deep = deepcopy(ulist)
|
|
46
47
|
ulist_deep["expression"] = ee_image
|
|
47
48
|
images_bytes = ee.data.computePixels(ulist_deep)
|
|
48
|
-
else:
|
|
49
|
+
else:
|
|
49
50
|
raise ValueError("Manifest does not contain 'assetId' or 'expression'")
|
|
50
|
-
|
|
51
|
-
with
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
51
|
+
|
|
52
|
+
with open(full_outname, "wb") as src:
|
|
53
|
+
src.write(images_bytes)
|
|
54
|
+
|
|
55
|
+
# with MemoryFile(images_bytes) as memfile:
|
|
56
|
+
# with memfile.open() as src:
|
|
57
|
+
# profile = src.profile
|
|
58
|
+
# profile.update(
|
|
59
|
+
# driver="GTiff",
|
|
60
|
+
# tiled=True,
|
|
61
|
+
# interleave="band",
|
|
62
|
+
# blockxsize=256,
|
|
63
|
+
# blockysize=256,
|
|
64
|
+
# compress="ZSTD",
|
|
65
|
+
# zstd_level=13,
|
|
66
|
+
# predictor=2,
|
|
67
|
+
# num_threads=20,
|
|
68
|
+
# nodata=65535,
|
|
69
|
+
# dtype="uint16",
|
|
70
|
+
# count=12,
|
|
71
|
+
# photometric="MINISBLACK"
|
|
72
|
+
# )
|
|
73
|
+
|
|
74
|
+
# with rio.open(full_outname, "w", **profile) as dst:
|
|
75
|
+
# dst.write(src.read())
|
|
72
76
|
|
|
73
77
|
def download_manifests(
|
|
74
78
|
manifests: list[Dict[str, Any]],
|
|
75
79
|
full_outname: pathlib.Path,
|
|
76
|
-
|
|
77
|
-
max_workers: int = 4,
|
|
80
|
+
max_workers: int,
|
|
78
81
|
) -> None:
|
|
79
82
|
"""Download every manifest in *manifests* concurrently.
|
|
80
83
|
|
|
@@ -82,38 +85,27 @@ def download_manifests(
|
|
|
82
85
|
``full_outname.parent/full_outname.stem`` with names ``000000.tif``,
|
|
83
86
|
``000001.tif`` … according to the list order.
|
|
84
87
|
"""
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
futures.append(
|
|
99
|
-
executor.submit(
|
|
100
|
-
download_manifest,
|
|
101
|
-
umanifest, # ulist = umanifest
|
|
102
|
-
outname # full_outname = outname
|
|
103
|
-
)
|
|
104
|
-
)
|
|
105
|
-
|
|
106
|
-
for fut in concurrent.futures.as_completed(futures):
|
|
88
|
+
tmp_dir = pathlib.Path(tempfile.mkdtemp(prefix="cubexpress_"))
|
|
89
|
+
full_outname_temp = tmp_dir / full_outname.stem
|
|
90
|
+
full_outname_temp.mkdir(parents=True, exist_ok=True)
|
|
91
|
+
|
|
92
|
+
with ThreadPoolExecutor(max_workers=max_workers) as exe: # -
|
|
93
|
+
futures = {
|
|
94
|
+
exe.submit(
|
|
95
|
+
download_manifest,
|
|
96
|
+
ulist=umanifest,
|
|
97
|
+
full_outname=full_outname_temp / f"{index:06d}.tif"
|
|
98
|
+
): umanifest for index, umanifest in enumerate(manifests)
|
|
99
|
+
}
|
|
100
|
+
for future in as_completed(futures):
|
|
107
101
|
try:
|
|
108
|
-
|
|
109
|
-
except Exception as exc:
|
|
110
|
-
print(f"Error
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
if dir_path.exists():
|
|
115
|
-
input_files = sorted(dir_path.glob("*.tif"))
|
|
102
|
+
future.result()
|
|
103
|
+
except Exception as exc:
|
|
104
|
+
print(f"Error in one of the downloads: {exc}")
|
|
105
|
+
|
|
106
|
+
if full_outname_temp.exists():
|
|
107
|
+
input_files = sorted(full_outname_temp.glob("*.tif"))
|
|
116
108
|
merge_tifs(input_files, full_outname)
|
|
117
|
-
shutil.rmtree(
|
|
109
|
+
shutil.rmtree(full_outname_temp)
|
|
118
110
|
else:
|
|
119
111
|
raise ValueError(f"Error in {full_outname}")
|
cubexpress/geospatial.py
CHANGED
cubexpress/geotyping.py
CHANGED
|
@@ -259,13 +259,8 @@ class RequestSet(BaseModel):
|
|
|
259
259
|
def create_manifests(self) -> pd.DataFrame:
|
|
260
260
|
"""
|
|
261
261
|
Exports the raster metadata to a pandas DataFrame.
|
|
262
|
-
|
|
263
262
|
Returns:
|
|
264
263
|
pd.DataFrame: A DataFrame containing the metadata for all entries.
|
|
265
|
-
|
|
266
|
-
Example:
|
|
267
|
-
>>> df = raster_transform_set.export_df()
|
|
268
|
-
>>> print(df)
|
|
269
264
|
"""
|
|
270
265
|
# Use ProcessPoolExecutor for CPU-bound tasks to convert raster transforms to lon/lat
|
|
271
266
|
with ProcessPoolExecutor(max_workers=None) as executor:
|
|
@@ -306,8 +301,6 @@ class RequestSet(BaseModel):
|
|
|
306
301
|
"crsCode": meta.raster_transform.crs,
|
|
307
302
|
},
|
|
308
303
|
},
|
|
309
|
-
# "cs_cdf": int(meta.id.split("_")[-1]) / 100,
|
|
310
|
-
# "date": meta.id.split("_")[0],
|
|
311
304
|
"outname": f"{meta.id}.tif",
|
|
312
305
|
}
|
|
313
306
|
|
|
@@ -428,18 +421,16 @@ class RequestSet(BaseModel):
|
|
|
428
421
|
def validate_metadata(self) -> RequestSet:
|
|
429
422
|
"""
|
|
430
423
|
Validates that all entries have consistent and valid CRS formats.
|
|
431
|
-
|
|
424
|
+
|
|
432
425
|
Returns:
|
|
433
426
|
RasterTransformSet: The validated instance.
|
|
434
427
|
|
|
435
428
|
Raises:
|
|
436
429
|
ValueError: If any CRS is invalid or inconsistent.
|
|
437
430
|
"""
|
|
438
|
-
# 1. Pre-consistency validation (CRS, IDs, etc.)
|
|
439
431
|
crs_set: Set[str] = {meta.raster_transform.crs for meta in self.requestset}
|
|
440
432
|
validated_crs: Set[str] = set()
|
|
441
433
|
|
|
442
|
-
# Validate CRS formats
|
|
443
434
|
for crs in crs_set:
|
|
444
435
|
if crs not in validated_crs:
|
|
445
436
|
try:
|
|
@@ -448,16 +439,11 @@ class RequestSet(BaseModel):
|
|
|
448
439
|
except Exception as e:
|
|
449
440
|
raise ValueError(f"Invalid CRS format: {crs}") from e
|
|
450
441
|
|
|
451
|
-
# Validate ids, they must be unique
|
|
452
442
|
ids = {meta.id for meta in self.requestset}
|
|
453
443
|
if len(ids) != len(self.requestset):
|
|
454
444
|
raise ValueError("All entries must have unique IDs")
|
|
455
445
|
|
|
456
|
-
# Upgrade same_coordinates to True if all coordinates are the same
|
|
457
|
-
# 2. We create the dataframe
|
|
458
446
|
self._dataframe = self.create_manifests()
|
|
459
|
-
|
|
460
|
-
# 3. We validate the structure of the dataframe
|
|
461
447
|
self._validate_dataframe_schema()
|
|
462
448
|
|
|
463
449
|
return self
|
cubexpress/request.py
CHANGED
|
@@ -11,9 +11,9 @@ from cubexpress.conversion import lonlat2rt
|
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
def table_to_requestset(
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
14
|
+
table: pd.DataFrame,
|
|
15
|
+
mosaic: bool = True
|
|
16
|
+
) -> RequestSet:
|
|
17
17
|
"""Return a :class:`RequestSet` built from *df* (cloud_table result).
|
|
18
18
|
|
|
19
19
|
Parameters
|
|
@@ -35,7 +35,7 @@ def table_to_requestset(
|
|
|
35
35
|
df = table.copy()
|
|
36
36
|
|
|
37
37
|
if df.empty:
|
|
38
|
-
raise ValueError("
|
|
38
|
+
raise ValueError("There are no images in the requested period. Please check your dates, ubication or cloud coverage.")
|
|
39
39
|
|
|
40
40
|
rt = lonlat2rt(
|
|
41
41
|
lon=df.attrs["lon"],
|
|
@@ -43,11 +43,11 @@ def table_to_requestset(
|
|
|
43
43
|
edge_size=df.attrs["edge_size"],
|
|
44
44
|
scale=df.attrs["scale"],
|
|
45
45
|
)
|
|
46
|
+
|
|
46
47
|
centre_hash = pgh.encode(df.attrs["lat"], df.attrs["lon"], precision=5)
|
|
47
|
-
reqs
|
|
48
|
+
reqs = []
|
|
48
49
|
|
|
49
50
|
if mosaic:
|
|
50
|
-
|
|
51
51
|
grouped = (
|
|
52
52
|
df.groupby('date')
|
|
53
53
|
.agg(
|
|
@@ -66,8 +66,7 @@ def table_to_requestset(
|
|
|
66
66
|
)
|
|
67
67
|
|
|
68
68
|
for day, row in grouped.iterrows():
|
|
69
|
-
|
|
70
|
-
|
|
69
|
+
|
|
71
70
|
img_ids = row["id_list"]
|
|
72
71
|
cdf = row["cs_cdf_mean"]
|
|
73
72
|
|
|
@@ -87,10 +86,8 @@ def table_to_requestset(
|
|
|
87
86
|
)
|
|
88
87
|
else:
|
|
89
88
|
for img_id in img_ids:
|
|
90
|
-
# tile = img_id.split("_")[-1][1:]
|
|
91
89
|
reqs.append(
|
|
92
90
|
Request(
|
|
93
|
-
# id=f"{day}_{centre_hash}_{tile}_{cdf}",
|
|
94
91
|
id=f"{day}_{centre_hash}_{cdf}",
|
|
95
92
|
raster_transform=rt,
|
|
96
93
|
image=f"{df.attrs['collection']}/{img_id}",
|
|
@@ -100,16 +97,16 @@ def table_to_requestset(
|
|
|
100
97
|
else:
|
|
101
98
|
for _, row in df.iterrows():
|
|
102
99
|
img_id = row["id"]
|
|
103
|
-
|
|
100
|
+
tile = img_id.split("_")[-1][1:]
|
|
104
101
|
day = row["date"]
|
|
105
102
|
cdf = int(round(row["cs_cdf"], 2) * 100)
|
|
106
103
|
reqs.append(
|
|
107
104
|
Request(
|
|
108
|
-
id=f"{day}_{
|
|
105
|
+
id=f"{day}_{tile}_{cdf}",
|
|
109
106
|
raster_transform=rt,
|
|
110
107
|
image=f"{df.attrs['collection']}/{img_id}",
|
|
111
108
|
bands=df.attrs["bands"],
|
|
112
109
|
)
|
|
113
110
|
)
|
|
114
111
|
|
|
115
|
-
return RequestSet(requestset=reqs)
|
|
112
|
+
return RequestSet(requestset=reqs)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: cubexpress
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.13
|
|
4
4
|
Summary: Efficient processing of cubic Earth-observation (EO) data.
|
|
5
5
|
Home-page: https://github.com/andesdatacube/cubexpress
|
|
6
6
|
Keywords: earth-engine,sentinel-2,geospatial,eo,cube
|
|
@@ -31,7 +31,7 @@ Description-Content-Type: text/markdown
|
|
|
31
31
|
<h1></h1>
|
|
32
32
|
|
|
33
33
|
<p align="center">
|
|
34
|
-
<img src="
|
|
34
|
+
<img src="https://raw.githubusercontent.com/andesdatacube/cubexpress/refs/heads/main/docs/logo_cubexpress.png" width="39%">
|
|
35
35
|
</p>
|
|
36
36
|
|
|
37
37
|
<p align="center">
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
cubexpress/__init__.py,sha256=MfBrTJhA1jUyjrTkNRzdGvF-lPidOtGHTK2qctjY0R8,542
|
|
2
|
+
cubexpress/cache.py,sha256=EZiR2AJfplaLpqMIVFb5piCAgFqHKF1vgLIrutfz8tA,1425
|
|
3
|
+
cubexpress/cloud_utils.py,sha256=zNnJn7kxHrXC1i_Gxfaa2AG7jBt_hF0jECMO6JuhaaM,7696
|
|
4
|
+
cubexpress/conversion.py,sha256=JSaMnswY-2n5E4H2zxb-oEOTJ8UPzXfMeSVCremtvTw,2520
|
|
5
|
+
cubexpress/cube.py,sha256=izDJGpzul7iBym_RanWMffaJE20Qz_Gg7df08lpr3Po,3354
|
|
6
|
+
cubexpress/downloader.py,sha256=rnk-oX51_YFWz1iZuBWEYTDSTV48F780o1aujTsKCwE,3725
|
|
7
|
+
cubexpress/geospatial.py,sha256=jldZ-aFqUEvp1SF8ZJEa-pDHSAs3akzqk43dHai0TKM,3820
|
|
8
|
+
cubexpress/geotyping.py,sha256=s5UwBvvVVmWesSkXjDW6y1Oym5TcC9ftU7WhTUXngFU,16176
|
|
9
|
+
cubexpress/request.py,sha256=eOjZQfJ5GzKzjhpPUt9KCJmOeqWER0c1b0ARH9Vlous,3256
|
|
10
|
+
cubexpress-0.1.13.dist-info/LICENSE,sha256=XjoS-d76b7Cl-VgCWhQk83tNf2dNldKBN8SrImwGc2Q,1072
|
|
11
|
+
cubexpress-0.1.13.dist-info/METADATA,sha256=8vSonZCOISUHkNtz-klcRa0wTjGa8gLfUIbPZ9P4ofs,9724
|
|
12
|
+
cubexpress-0.1.13.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
13
|
+
cubexpress-0.1.13.dist-info/RECORD,,
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
cubexpress/__init__.py,sha256=sKXcYQQPREFhVCHP81lL_5hAurUTm8MX1xVOEOMF-nA,618
|
|
2
|
-
cubexpress/cache.py,sha256=EZiR2AJfplaLpqMIVFb5piCAgFqHKF1vgLIrutfz8tA,1425
|
|
3
|
-
cubexpress/cloud_utils.py,sha256=BxS3HADLNj6rdFGYUjpcXA1Vvsa87JoL28YEAsu51H4,7482
|
|
4
|
-
cubexpress/conversion.py,sha256=JSaMnswY-2n5E4H2zxb-oEOTJ8UPzXfMeSVCremtvTw,2520
|
|
5
|
-
cubexpress/cube.py,sha256=SMN6MvezfeHipFE4v4f23dxWGk9h2t2s2aeeppD0voY,4133
|
|
6
|
-
cubexpress/downloader.py,sha256=XsLDlq2ZHEccc1ET8ghnuOIYtGazVDwXohMSWBemVMw,4067
|
|
7
|
-
cubexpress/geospatial.py,sha256=2DGwl3pyfNEOj8nn9gjc-tiiTXhV2ez9Bghz1I0vERs,3822
|
|
8
|
-
cubexpress/geotyping.py,sha256=Fbnn7EoRvXrtjTRFTS4CPzQbxG4PA6WkfeM4YUp9iKg,16696
|
|
9
|
-
cubexpress/request.py,sha256=PiDqnt3qB9tac4KkZdPIrv5VeRHqobk1u2q1VCCH2lI,3390
|
|
10
|
-
cubexpress-0.1.11.dist-info/LICENSE,sha256=XjoS-d76b7Cl-VgCWhQk83tNf2dNldKBN8SrImwGc2Q,1072
|
|
11
|
-
cubexpress-0.1.11.dist-info/METADATA,sha256=pxtqImmO_wIyA9P_0TWaxxps0O-95O6aVLbfEQ9GvBk,9651
|
|
12
|
-
cubexpress-0.1.11.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
13
|
-
cubexpress-0.1.11.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|