cubexpress 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cubexpress might be problematic. Click here for more details.
- cubexpress/__init__.py +11 -4
- cubexpress/cache.py +50 -0
- cubexpress/cloud_utils.py +256 -0
- cubexpress/conversion.py +1 -1
- cubexpress/cube.py +84 -0
- cubexpress/downloader.py +95 -0
- cubexpress/geospatial.py +55 -0
- cubexpress/geotyping.py +1 -1
- cubexpress/request.py +77 -0
- {cubexpress-0.1.0.dist-info → cubexpress-0.1.1.dist-info}/METADATA +13 -5
- cubexpress-0.1.1.dist-info/RECORD +13 -0
- cubexpress/download.py +0 -347
- cubexpress-0.1.0.dist-info/RECORD +0 -8
- {cubexpress-0.1.0.dist-info → cubexpress-0.1.1.dist-info}/LICENSE +0 -0
- {cubexpress-0.1.0.dist-info → cubexpress-0.1.1.dist-info}/WHEEL +0 -0
cubexpress/__init__.py
CHANGED
|
@@ -1,15 +1,22 @@
|
|
|
1
|
-
from cubexpress.conversion import lonlat2rt
|
|
2
|
-
from cubexpress.download import getcube, getGeoTIFF
|
|
1
|
+
from cubexpress.conversion import lonlat2rt, geo2utm
|
|
3
2
|
from cubexpress.geotyping import RasterTransform, Request, RequestSet
|
|
3
|
+
from cubexpress.cloud_utils import cloud_table
|
|
4
|
+
from cubexpress.cube import get_cube
|
|
5
|
+
from cubexpress.request import table_to_requestset
|
|
4
6
|
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
# pyproj
|
|
5
10
|
# Export the functions
|
|
6
11
|
__all__ = [
|
|
7
12
|
"lonlat2rt",
|
|
8
13
|
"RasterTransform",
|
|
9
14
|
"Request",
|
|
10
15
|
"RequestSet",
|
|
11
|
-
"
|
|
12
|
-
"
|
|
16
|
+
"geo2utm",
|
|
17
|
+
"get_cube",
|
|
18
|
+
"cloud_table",
|
|
19
|
+
"table_to_requestset"
|
|
13
20
|
]
|
|
14
21
|
|
|
15
22
|
# Dynamic version import
|
cubexpress/cache.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Simple file-based cache helpers for cloud_table results."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import hashlib
|
|
6
|
+
import json
|
|
7
|
+
import os
|
|
8
|
+
import pathlib
|
|
9
|
+
from typing import Final
|
|
10
|
+
|
|
11
|
+
# Folder where per-location parquet files are stored.
|
|
12
|
+
_CACHE_DIR: Final[pathlib.Path] = pathlib.Path(
|
|
13
|
+
os.getenv("CUBEXPRESS_CACHE", "~/.cubexpress_cache")
|
|
14
|
+
).expanduser()
|
|
15
|
+
_CACHE_DIR.mkdir(exist_ok=True)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _cache_key(
|
|
19
|
+
lon: float,
|
|
20
|
+
lat: float,
|
|
21
|
+
edge_size: int,
|
|
22
|
+
scale: int,
|
|
23
|
+
collection: str,
|
|
24
|
+
) -> pathlib.Path:
|
|
25
|
+
"""Return deterministic parquet path for the given query parameters.
|
|
26
|
+
|
|
27
|
+
A 128-bit MD5 hash of the rounded coordinates, edge size, scale and
|
|
28
|
+
collection is used as file name to avoid overly long paths and ensure
|
|
29
|
+
uniqueness.
|
|
30
|
+
|
|
31
|
+
Parameters
|
|
32
|
+
----------
|
|
33
|
+
lon, lat
|
|
34
|
+
Centre coordinates in decimal degrees; rounded to 4 dp (≈ 11 m).
|
|
35
|
+
edge_size
|
|
36
|
+
Edge length in pixels of the requested square ROI.
|
|
37
|
+
scale
|
|
38
|
+
Pixel size in metres.
|
|
39
|
+
collection
|
|
40
|
+
EE collection name (e.g. ``"COPERNICUS/S2_HARMONIZED"``).
|
|
41
|
+
|
|
42
|
+
Returns
|
|
43
|
+
-------
|
|
44
|
+
pathlib.Path
|
|
45
|
+
Absolute path ending in ``.parquet`` under ``_CACHE_DIR``.
|
|
46
|
+
"""
|
|
47
|
+
lon_r, lat_r = round(lon, 4), round(lat, 4)
|
|
48
|
+
raw = json.dumps([lon_r, lat_r, edge_size, scale, collection]).encode()
|
|
49
|
+
digest = hashlib.md5(raw).hexdigest() # noqa: S324 – non-cryptographic OK
|
|
50
|
+
return _CACHE_DIR / f"{digest}.parquet"
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
"""Cloud-coverage tables for Sentinel-2 over a square ROI.
|
|
2
|
+
|
|
3
|
+
Two helpers are exposed:
|
|
4
|
+
|
|
5
|
+
* :func:`_cloud_table_single_range` – query Earth Engine for one date-range.
|
|
6
|
+
* :func:`cloud_table` – smart wrapper that adds on-disk caching, automatic
|
|
7
|
+
back-filling, and cloud-percentage filtering.
|
|
8
|
+
|
|
9
|
+
Both return a ``pandas.DataFrame`` with the columns **day**, **cloudPct** and
|
|
10
|
+
**images** plus useful ``.attrs`` metadata for downstream functions.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import datetime as dt
|
|
16
|
+
import json
|
|
17
|
+
import pathlib
|
|
18
|
+
from typing import List, Optional
|
|
19
|
+
|
|
20
|
+
import ee
|
|
21
|
+
import pandas as pd
|
|
22
|
+
|
|
23
|
+
from cubexpress.cache import _cache_key
|
|
24
|
+
from cubexpress.geospatial import _square_roi
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _cloud_table_single_range(
|
|
28
|
+
lon: float,
|
|
29
|
+
lat: float,
|
|
30
|
+
edge_size: int,
|
|
31
|
+
scale: int,
|
|
32
|
+
start: str,
|
|
33
|
+
end: str,
|
|
34
|
+
collection: str = "COPERNICUS/S2_HARMONIZED",
|
|
35
|
+
) -> pd.DataFrame:
|
|
36
|
+
"""Return raw cloud-table rows for a single *start–end* interval.
|
|
37
|
+
|
|
38
|
+
Parameters
|
|
39
|
+
----------
|
|
40
|
+
lon, lat
|
|
41
|
+
Centre coordinates in decimal degrees.
|
|
42
|
+
edge_size, scale
|
|
43
|
+
ROI size in pixels (*edge_size*) and pixel resolution in metres
|
|
44
|
+
(*scale*), fed into :pyfunc:`cubexpress.geospatial._square_roi`.
|
|
45
|
+
start, end
|
|
46
|
+
ISO-dates (``YYYY-MM-DD``) delimiting the query.
|
|
47
|
+
collection
|
|
48
|
+
Sentinel-2 collection name to query.
|
|
49
|
+
|
|
50
|
+
Returns
|
|
51
|
+
-------
|
|
52
|
+
pandas.DataFrame
|
|
53
|
+
Columns: **day** (str), **cloudPct** (float), **images** (str
|
|
54
|
+
concatenation of asset IDs separated by ``-``). No filtering applied.
|
|
55
|
+
"""
|
|
56
|
+
roi = _square_roi(lon, lat, edge_size, scale)
|
|
57
|
+
s2 = ee.ImageCollection(collection)
|
|
58
|
+
|
|
59
|
+
if collection in (
|
|
60
|
+
"COPERNICUS/S2_HARMONIZED",
|
|
61
|
+
"COPERNICUS/S2_SR_HARMONIZED",
|
|
62
|
+
):
|
|
63
|
+
qa_band = "cs_cdf"
|
|
64
|
+
csp = ee.ImageCollection("GOOGLE/CLOUD_SCORE_PLUS/V1/S2_HARMONIZED")
|
|
65
|
+
else:
|
|
66
|
+
qa_band, csp = None, None
|
|
67
|
+
|
|
68
|
+
def _add_props(img):
|
|
69
|
+
day = ee.Date(img.get("system:time_start")).format("YYYY-MM-dd")
|
|
70
|
+
imgid = img.get("system:index")
|
|
71
|
+
|
|
72
|
+
if qa_band:
|
|
73
|
+
score = (
|
|
74
|
+
img.linkCollection(csp, [qa_band])
|
|
75
|
+
.select([qa_band])
|
|
76
|
+
.reduceRegion(ee.Reducer.mean(), roi, scale)
|
|
77
|
+
.get(qa_band)
|
|
78
|
+
)
|
|
79
|
+
# If score is null assume completely clear (score=1 → cloudPct=0)
|
|
80
|
+
score_safe = ee.Algorithms.If(score, score, -1)
|
|
81
|
+
cloud_pct = (
|
|
82
|
+
ee.Number(1)
|
|
83
|
+
.subtract(ee.Number(score_safe))
|
|
84
|
+
.multiply(10000)
|
|
85
|
+
.round()
|
|
86
|
+
.divide(100)
|
|
87
|
+
)
|
|
88
|
+
else:
|
|
89
|
+
cloud_pct = ee.Number(-1)
|
|
90
|
+
|
|
91
|
+
return ee.Feature(
|
|
92
|
+
None,
|
|
93
|
+
{
|
|
94
|
+
"day": day,
|
|
95
|
+
"cloudPct": cloud_pct,
|
|
96
|
+
"images": imgid,
|
|
97
|
+
},
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
triples = (
|
|
101
|
+
s2.filterDate(start, end)
|
|
102
|
+
.filterBounds(roi)
|
|
103
|
+
.map(_add_props)
|
|
104
|
+
.reduceColumns(ee.Reducer.toList(3), ["day", "cloudPct", "images"])
|
|
105
|
+
.get("list")
|
|
106
|
+
.getInfo()
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
df = pd.DataFrame(triples, columns=["day", "cloudPct", "images"]).dropna()
|
|
110
|
+
df["cloudPct"] = df["cloudPct"].astype(float)
|
|
111
|
+
df["images"] = df["images"].astype(str)
|
|
112
|
+
return df
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def cloud_table(
|
|
116
|
+
lon: float,
|
|
117
|
+
lat: float,
|
|
118
|
+
edge_size: int = 2048,
|
|
119
|
+
scale: int = 10,
|
|
120
|
+
start: str = "2017-01-01",
|
|
121
|
+
end: str = "2024-12-31",
|
|
122
|
+
cloud_max: float = 7.0,
|
|
123
|
+
bands: Optional[List[str]] = None,
|
|
124
|
+
collection: str = "COPERNICUS/S2_HARMONIZED",
|
|
125
|
+
output_path: str | pathlib.Path | None = None,
|
|
126
|
+
cache: bool = True,
|
|
127
|
+
verbose: bool = True,
|
|
128
|
+
) -> pd.DataFrame:
|
|
129
|
+
"""Build (and cache) a per-day cloud-table for the requested ROI.
|
|
130
|
+
|
|
131
|
+
The function first checks an on-disk parquet cache keyed on location and
|
|
132
|
+
parameters. If parts of the requested date-range are missing, it fetches
|
|
133
|
+
only those gaps from Earth Engine, merges, updates the cache and finally
|
|
134
|
+
filters by *cloud_max*.
|
|
135
|
+
|
|
136
|
+
Parameters
|
|
137
|
+
----------
|
|
138
|
+
lon, lat
|
|
139
|
+
Centre coordinates.
|
|
140
|
+
edge_size, scale
|
|
141
|
+
Square size (pixels) and resolution (metres).
|
|
142
|
+
start, end
|
|
143
|
+
ISO start/end dates.
|
|
144
|
+
cloud_max
|
|
145
|
+
Maximum allowed cloud percentage (0-100). Rows above this threshold are
|
|
146
|
+
dropped.
|
|
147
|
+
bands
|
|
148
|
+
List of spectral bands to embed as metadata. If *None* the full
|
|
149
|
+
Sentinel-2 set is used.
|
|
150
|
+
collection
|
|
151
|
+
Sentinel-2 collection to query.
|
|
152
|
+
output_path
|
|
153
|
+
Downstream path hint stored in ``result.attrs``; not used internally.
|
|
154
|
+
cache
|
|
155
|
+
Toggle parquet caching.
|
|
156
|
+
verbose
|
|
157
|
+
If *True* prints cache info/progress.
|
|
158
|
+
|
|
159
|
+
Returns
|
|
160
|
+
-------
|
|
161
|
+
pandas.DataFrame
|
|
162
|
+
Filtered cloud table with ``.attrs`` containing the call parameters.
|
|
163
|
+
"""
|
|
164
|
+
if bands is None:
|
|
165
|
+
bands = [
|
|
166
|
+
"B1",
|
|
167
|
+
"B2",
|
|
168
|
+
"B3",
|
|
169
|
+
"B4",
|
|
170
|
+
"B5",
|
|
171
|
+
"B6",
|
|
172
|
+
"B7",
|
|
173
|
+
"B8",
|
|
174
|
+
"B8A",
|
|
175
|
+
"B9",
|
|
176
|
+
"B10",
|
|
177
|
+
"B11",
|
|
178
|
+
"B12",
|
|
179
|
+
]
|
|
180
|
+
|
|
181
|
+
cache_file = _cache_key(lon, lat, edge_size, scale, collection)
|
|
182
|
+
|
|
183
|
+
# ─── 1. Load cached data if present ────────────────────────────────────
|
|
184
|
+
if cache and cache_file.exists():
|
|
185
|
+
if verbose:
|
|
186
|
+
print("📂 Loading cached table …")
|
|
187
|
+
df_cached = pd.read_parquet(cache_file)
|
|
188
|
+
have_idx = pd.to_datetime(df_cached["day"], errors="coerce").dropna()
|
|
189
|
+
|
|
190
|
+
cached_start = have_idx.min().date()
|
|
191
|
+
cached_end = have_idx.max().date()
|
|
192
|
+
|
|
193
|
+
if (
|
|
194
|
+
dt.date.fromisoformat(start) >= cached_start
|
|
195
|
+
and dt.date.fromisoformat(end) <= cached_end
|
|
196
|
+
):
|
|
197
|
+
if verbose:
|
|
198
|
+
print("✅ Served entirely from cache.")
|
|
199
|
+
df_full = df_cached
|
|
200
|
+
else:
|
|
201
|
+
# Identify missing segments and fetch only those.
|
|
202
|
+
df_new_parts = []
|
|
203
|
+
if dt.date.fromisoformat(start) < cached_start:
|
|
204
|
+
a1, b1 = start, cached_start.isoformat()
|
|
205
|
+
df_new_parts.append(
|
|
206
|
+
_cloud_table_single_range(
|
|
207
|
+
lon, lat, edge_size, scale, a1, b1, collection
|
|
208
|
+
)
|
|
209
|
+
)
|
|
210
|
+
if dt.date.fromisoformat(end) > cached_end:
|
|
211
|
+
a2, b2 = cached_end.isoformat(), end
|
|
212
|
+
df_new_parts.append(
|
|
213
|
+
_cloud_table_single_range(
|
|
214
|
+
lon, lat, edge_size, scale, a2, b2, collection
|
|
215
|
+
)
|
|
216
|
+
)
|
|
217
|
+
df_new = pd.concat(df_new_parts, ignore_index=True)
|
|
218
|
+
df_full = (
|
|
219
|
+
pd.concat([df_cached, df_new], ignore_index=True)
|
|
220
|
+
.drop_duplicates("day")
|
|
221
|
+
.sort_values("day", kind="mergesort")
|
|
222
|
+
)
|
|
223
|
+
else:
|
|
224
|
+
# No cache or caching disabled: fetch full range.
|
|
225
|
+
if verbose:
|
|
226
|
+
msg = "Generating table (no cache found)…" if cache else "Generating table…"
|
|
227
|
+
print("⏳", msg)
|
|
228
|
+
df_full = _cloud_table_single_range(
|
|
229
|
+
lon, lat, edge_size, scale, start, end, collection
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
# ─── 2. Save cache ─────────────────────────────────────────────────────
|
|
233
|
+
if cache:
|
|
234
|
+
df_full.to_parquet(cache_file, compression="zstd")
|
|
235
|
+
|
|
236
|
+
# ─── 3. Filter by cloud cover and requested date window ────────────────
|
|
237
|
+
result = (
|
|
238
|
+
df_full.query("@start <= day <= @end")
|
|
239
|
+
.query("cloudPct < @cloud_max")
|
|
240
|
+
.reset_index(drop=True)
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
# Attach metadata for downstream helpers
|
|
244
|
+
result.attrs.update(
|
|
245
|
+
{
|
|
246
|
+
"lon": lon,
|
|
247
|
+
"lat": lat,
|
|
248
|
+
"edge_size": edge_size,
|
|
249
|
+
"scale": scale,
|
|
250
|
+
"bands": bands,
|
|
251
|
+
"collection": collection,
|
|
252
|
+
"cloud_max": cloud_max,
|
|
253
|
+
"output_path": str(output_path) if output_path else "",
|
|
254
|
+
}
|
|
255
|
+
)
|
|
256
|
+
return result
|
cubexpress/conversion.py
CHANGED
|
@@ -19,7 +19,7 @@ def geo2utm(lon: float, lat: float) -> tuple[float, float, str]:
|
|
|
19
19
|
"""
|
|
20
20
|
x, y, zone, _ = utm.from_latlon(lat, lon)
|
|
21
21
|
epsg_code = f"326{zone:02d}" if lat >= 0 else f"327{zone:02d}"
|
|
22
|
-
return x, y, f"EPSG:{epsg_code}"
|
|
22
|
+
return float(x), float(y), f"EPSG:{epsg_code}"
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
def lonlat2rt(lon: float, lat: float, edge_size: int, scale: int) -> RasterTransform:
|
cubexpress/cube.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""High-level helpers for tiled GeoTIFF downloads.
|
|
2
|
+
|
|
3
|
+
The module provides two thread-friendly wrappers:
|
|
4
|
+
|
|
5
|
+
* **get_geotiff** – download a single manifest, auto-tiling on EE pixel-count
|
|
6
|
+
errors.
|
|
7
|
+
* **get_cube** – iterate over a ``RequestSet`` (or similar) and build a local
|
|
8
|
+
raster “cube” in parallel.
|
|
9
|
+
|
|
10
|
+
The core download/split logic lives in *cubexpress.downloader* and
|
|
11
|
+
*cubexpress.geospatial*; here we merely orchestrate it.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import pathlib
|
|
17
|
+
import concurrent.futures
|
|
18
|
+
from typing import Dict, Any
|
|
19
|
+
|
|
20
|
+
import ee
|
|
21
|
+
from cubexpress.downloader import download_manifest, download_manifests
|
|
22
|
+
from cubexpress.geospatial import quadsplit_manifest, calculate_cell_size
|
|
23
|
+
from cubexpress.geotyping import RequestSet
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def get_geotiff(
|
|
27
|
+
manifest: Dict[str, Any],
|
|
28
|
+
full_outname: pathlib.Path | str,
|
|
29
|
+
nworks: int = 4,
|
|
30
|
+
) -> None:
|
|
31
|
+
"""Download *manifest* to *full_outname*, retrying with tiled requests.
|
|
32
|
+
|
|
33
|
+
Parameters
|
|
34
|
+
----------
|
|
35
|
+
manifest
|
|
36
|
+
Earth Engine download manifest returned by cubexpress.
|
|
37
|
+
full_outname
|
|
38
|
+
Final ``.tif`` path (created/overwritten).
|
|
39
|
+
nworks
|
|
40
|
+
Maximum worker threads when the image must be split; default **4**.
|
|
41
|
+
"""
|
|
42
|
+
try:
|
|
43
|
+
download_manifest(manifest, pathlib.Path(full_outname))
|
|
44
|
+
except ee.ee_exception.EEException as err:
|
|
45
|
+
# Handle EE “too many pixels” error by recursive tiling.
|
|
46
|
+
size = manifest["grid"]["dimensions"]["width"] # square images assumed
|
|
47
|
+
cell_w, cell_h, power = calculate_cell_size(str(err), size)
|
|
48
|
+
tiled = quadsplit_manifest(manifest, cell_w, cell_h, power)
|
|
49
|
+
download_manifests(tiled, max_workers=nworks, full_outname=pathlib.Path(full_outname))
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def get_cube(
|
|
53
|
+
requests: RequestSet,
|
|
54
|
+
outfolder: pathlib.Path | str,
|
|
55
|
+
nworks: int = 4,
|
|
56
|
+
) -> None:
|
|
57
|
+
"""Download every request in *requests* to *outfolder* using a thread pool.
|
|
58
|
+
|
|
59
|
+
Each row in ``requests._dataframe`` must expose ``manifest`` and ``id``.
|
|
60
|
+
Resulting files are named ``{id}.tif``.
|
|
61
|
+
|
|
62
|
+
Parameters
|
|
63
|
+
----------
|
|
64
|
+
requests
|
|
65
|
+
A ``RequestSet`` or object with an internal ``_dataframe`` attribute.
|
|
66
|
+
outfolder
|
|
67
|
+
Folder where the GeoTIFFs will be written (created if absent).
|
|
68
|
+
nworks
|
|
69
|
+
Pool size for concurrent downloads; default **4**.
|
|
70
|
+
"""
|
|
71
|
+
out = pathlib.Path(outfolder)
|
|
72
|
+
|
|
73
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=nworks) as pool:
|
|
74
|
+
futures = []
|
|
75
|
+
for _, row in requests._dataframe.iterrows():
|
|
76
|
+
outname = out / f"{row.id}.tif"
|
|
77
|
+
outname.parent.mkdir(parents=True, exist_ok=True)
|
|
78
|
+
futures.append(pool.submit(get_geotiff, row.manifest, outname, nworks))
|
|
79
|
+
|
|
80
|
+
for fut in concurrent.futures.as_completed(futures):
|
|
81
|
+
try:
|
|
82
|
+
fut.result()
|
|
83
|
+
except Exception as exc: # noqa: BLE001 – log and keep going
|
|
84
|
+
print(f"Download error: {exc}")
|
cubexpress/downloader.py
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"""Low-level download helpers for Earth Engine manifests.
|
|
2
|
+
|
|
3
|
+
Only two public callables are exposed:
|
|
4
|
+
|
|
5
|
+
* :func:`download_manifest` – fetch a single manifest and write one GeoTIFF.
|
|
6
|
+
* :func:`download_manifests` – convenience wrapper to parallel-download a list
|
|
7
|
+
of manifests with a thread pool.
|
|
8
|
+
|
|
9
|
+
Both functions are fully I/O bound; no return value is expected.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import json
|
|
15
|
+
import pathlib
|
|
16
|
+
import concurrent.futures
|
|
17
|
+
from copy import deepcopy
|
|
18
|
+
from typing import Any, Dict, List
|
|
19
|
+
|
|
20
|
+
import ee
|
|
21
|
+
import rasterio as rio
|
|
22
|
+
from rasterio.io import MemoryFile
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def download_manifest(ulist: Dict[str, Any], full_outname: pathlib.Path) -> None:
|
|
26
|
+
"""Download *ulist* and save it as *full_outname*.
|
|
27
|
+
|
|
28
|
+
The manifest must include either an ``assetId`` or an ``expression``
|
|
29
|
+
(serialized EE image). RasterIO is used to write a tiled, compressed
|
|
30
|
+
GeoTIFF; the function is silent apart from the final ``print``.
|
|
31
|
+
"""
|
|
32
|
+
if "assetId" in ulist:
|
|
33
|
+
images_bytes = ee.data.getPixels(ulist)
|
|
34
|
+
elif "expression" in ulist:
|
|
35
|
+
ee_image = ee.deserializer.decode(json.loads(ulist["expression"]))
|
|
36
|
+
ulist_deep = deepcopy(ulist)
|
|
37
|
+
ulist_deep["expression"] = ee_image
|
|
38
|
+
images_bytes = ee.data.computePixels(ulist_deep)
|
|
39
|
+
else: # pragma: no cover
|
|
40
|
+
raise ValueError("Manifest does not contain 'assetId' or 'expression'")
|
|
41
|
+
|
|
42
|
+
with MemoryFile(images_bytes) as memfile:
|
|
43
|
+
with memfile.open() as src:
|
|
44
|
+
profile = src.profile
|
|
45
|
+
profile.update(
|
|
46
|
+
{
|
|
47
|
+
"driver": "Gtiff",
|
|
48
|
+
"tiled": "yes",
|
|
49
|
+
"interleave": "band",
|
|
50
|
+
"blockxsize": 256,
|
|
51
|
+
"blockysize": 256,
|
|
52
|
+
"compress": "ZSTD",
|
|
53
|
+
"predictor": 2,
|
|
54
|
+
"num_threads": 20,
|
|
55
|
+
"nodata": 65535,
|
|
56
|
+
"dtype": "uint16",
|
|
57
|
+
"count": 13,
|
|
58
|
+
"lztd_level": 13,
|
|
59
|
+
"copy_src_overviews": True,
|
|
60
|
+
"overviews": "AUTO",
|
|
61
|
+
}
|
|
62
|
+
)
|
|
63
|
+
all_bands = src.read()
|
|
64
|
+
|
|
65
|
+
with rio.open(full_outname, "w", **profile) as dst:
|
|
66
|
+
dst.write(all_bands)
|
|
67
|
+
|
|
68
|
+
print(f"{full_outname} downloaded successfully.") # noqa: T201
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def download_manifests(
|
|
72
|
+
manifests: List[Dict[str, Any]],
|
|
73
|
+
max_workers: int,
|
|
74
|
+
full_outname: pathlib.Path,
|
|
75
|
+
) -> None:
|
|
76
|
+
"""Download every manifest in *manifests* concurrently.
|
|
77
|
+
|
|
78
|
+
Each output file is saved in the folder
|
|
79
|
+
``full_outname.parent/full_outname.stem`` with names ``000000.tif``,
|
|
80
|
+
``000001.tif`` … according to the list order.
|
|
81
|
+
"""
|
|
82
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
83
|
+
futures = []
|
|
84
|
+
|
|
85
|
+
for index, umanifest in enumerate(manifests):
|
|
86
|
+
folder = full_outname.parent / full_outname.stem
|
|
87
|
+
folder.mkdir(parents=True, exist_ok=True)
|
|
88
|
+
outname = folder / f"{index:06d}.tif"
|
|
89
|
+
futures.append(executor.submit(download_manifest, umanifest, outname))
|
|
90
|
+
|
|
91
|
+
for fut in concurrent.futures.as_completed(futures):
|
|
92
|
+
try:
|
|
93
|
+
fut.result()
|
|
94
|
+
except Exception as exc: # noqa: BLE001
|
|
95
|
+
print(f"Error en una de las descargas: {exc}") # noqa: T201
|
cubexpress/geospatial.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import ee
|
|
2
|
+
import re
|
|
3
|
+
from copy import deepcopy
|
|
4
|
+
from typing import Dict
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def quadsplit_manifest(manifest: Dict, cell_width: int, cell_height: int, power: int) -> list[Dict]:
|
|
8
|
+
manifest_copy = deepcopy(manifest)
|
|
9
|
+
|
|
10
|
+
manifest_copy["grid"]["dimensions"]["width"] = cell_width
|
|
11
|
+
manifest_copy["grid"]["dimensions"]["height"] = cell_height
|
|
12
|
+
x = manifest_copy["grid"]["affineTransform"]["translateX"]
|
|
13
|
+
y = manifest_copy["grid"]["affineTransform"]["translateY"]
|
|
14
|
+
scale_x = manifest_copy["grid"]["affineTransform"]["scaleX"]
|
|
15
|
+
scale_y = manifest_copy["grid"]["affineTransform"]["scaleY"]
|
|
16
|
+
|
|
17
|
+
manifests = []
|
|
18
|
+
|
|
19
|
+
for columny in range(2**power):
|
|
20
|
+
for rowx in range(2**power):
|
|
21
|
+
new_x = x + (rowx * cell_width) * scale_x
|
|
22
|
+
new_y = y + (columny * cell_height) * scale_y
|
|
23
|
+
new_manifest = deepcopy(manifest_copy)
|
|
24
|
+
new_manifest["grid"]["affineTransform"]["translateX"] = new_x
|
|
25
|
+
new_manifest["grid"]["affineTransform"]["translateY"] = new_y
|
|
26
|
+
manifests.append(new_manifest)
|
|
27
|
+
|
|
28
|
+
return manifests
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def calculate_cell_size(ee_error_message: str, size: int) -> tuple[int, int]:
|
|
33
|
+
match = re.findall(r'\d+', ee_error_message)
|
|
34
|
+
image_pixel = int(match[0])
|
|
35
|
+
max_pixel = int(match[1])
|
|
36
|
+
|
|
37
|
+
images = image_pixel / max_pixel
|
|
38
|
+
power = 0
|
|
39
|
+
|
|
40
|
+
while images > 1:
|
|
41
|
+
power += 1
|
|
42
|
+
images = image_pixel / (max_pixel * 4 ** power)
|
|
43
|
+
|
|
44
|
+
cell_width = size // 2 ** power
|
|
45
|
+
cell_height = size // 2 ** power
|
|
46
|
+
|
|
47
|
+
return cell_width, cell_height, power
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _square_roi(lon: float, lat: float, edge_size: int, scale: int) -> ee.Geometry:
|
|
52
|
+
"""Return a square `ee.Geometry` centred on (*lon*, *lat*)."""
|
|
53
|
+
half = edge_size * scale / 2
|
|
54
|
+
point = ee.Geometry.Point([lon, lat])
|
|
55
|
+
return point.buffer(half).bounds()
|
cubexpress/geotyping.py
CHANGED
cubexpress/request.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""Convert cloud_table output into a RequestSet."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import ee
|
|
6
|
+
import pandas as pd
|
|
7
|
+
import pygeohash as pgh
|
|
8
|
+
from typing import List
|
|
9
|
+
|
|
10
|
+
from cubexpress.geotyping import Request, RequestSet
|
|
11
|
+
from cubexpress.conversion import lonlat2rt
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def table_to_requestset(df: pd.DataFrame, *, mosaic: bool = True) -> RequestSet:
|
|
15
|
+
"""Return a :class:`RequestSet` built from *df* (cloud_table result).
|
|
16
|
+
|
|
17
|
+
Parameters
|
|
18
|
+
----------
|
|
19
|
+
df
|
|
20
|
+
DataFrame with *day* and *images* columns plus attrs created by
|
|
21
|
+
:pyfunc:`cubexpress.cloud_table`.
|
|
22
|
+
mosaic
|
|
23
|
+
If ``True`` a single mosaic per day is requested; otherwise each
|
|
24
|
+
individual asset becomes its own request.
|
|
25
|
+
|
|
26
|
+
Raises
|
|
27
|
+
------
|
|
28
|
+
ValueError
|
|
29
|
+
If *df* is empty after filtering.
|
|
30
|
+
|
|
31
|
+
"""
|
|
32
|
+
if df.empty:
|
|
33
|
+
raise ValueError("cloud_table returned no rows; nothing to request.")
|
|
34
|
+
|
|
35
|
+
rt = lonlat2rt(
|
|
36
|
+
lon=df.attrs["lon"],
|
|
37
|
+
lat=df.attrs["lat"],
|
|
38
|
+
edge_size=df.attrs["edge_size"],
|
|
39
|
+
scale=df.attrs["scale"],
|
|
40
|
+
)
|
|
41
|
+
centre_hash = pgh.encode(df.attrs["lat"], df.attrs["lon"], precision=5)
|
|
42
|
+
reqs: List[Request] = []
|
|
43
|
+
|
|
44
|
+
if mosaic:
|
|
45
|
+
# group all asset IDs per day
|
|
46
|
+
grouped = (
|
|
47
|
+
df.assign(img=lambda x: x.images.str.split("-"))
|
|
48
|
+
.explode("img")
|
|
49
|
+
.groupby("day")["img"]
|
|
50
|
+
.apply(list)
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
for day, img_ids in grouped.items():
|
|
54
|
+
ee_img = ee.ImageCollection(
|
|
55
|
+
[ee.Image(f"{df.attrs['collection']}/{img}") for img in img_ids]
|
|
56
|
+
).mosaic()
|
|
57
|
+
reqs.append(
|
|
58
|
+
Request(
|
|
59
|
+
id=f"{day}_{centre_hash}_mosaic",
|
|
60
|
+
raster_transform=rt,
|
|
61
|
+
image=ee_img,
|
|
62
|
+
bands=df.attrs["bands"],
|
|
63
|
+
)
|
|
64
|
+
)
|
|
65
|
+
else: # one request per asset
|
|
66
|
+
for _, row in df.iterrows():
|
|
67
|
+
for img_id in row["images"].split("-"):
|
|
68
|
+
reqs.append(
|
|
69
|
+
Request(
|
|
70
|
+
id=f"{row['day']}_{centre_hash}_{img_id}",
|
|
71
|
+
raster_transform=rt,
|
|
72
|
+
image=f"{df.attrs['collection']}/{img_id}",
|
|
73
|
+
bands=df.attrs["bands"],
|
|
74
|
+
)
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
return RequestSet(requestset=reqs)
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: cubexpress
|
|
3
|
-
Version: 0.1.
|
|
4
|
-
Summary:
|
|
5
|
-
Home-page: https://github.com/andesdatacube/cubexpress
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Efficient processing of cubic Earth-observation (EO) data.
|
|
5
|
+
Home-page: https://github.com/andesdatacube/cubexpress
|
|
6
6
|
License: MIT
|
|
7
|
+
Keywords: earth-engine,sentinel-2,geospatial,eo,cube
|
|
7
8
|
Author: Julio Contreras
|
|
8
9
|
Author-email: contrerasnetk@gmail.com
|
|
9
10
|
Requires-Python: >=3.9,<4.0
|
|
@@ -13,11 +14,18 @@ Classifier: Programming Language :: Python :: 3.9
|
|
|
13
14
|
Classifier: Programming Language :: Python :: 3.10
|
|
14
15
|
Classifier: Programming Language :: Python :: 3.11
|
|
15
16
|
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: GIS
|
|
19
|
+
Provides-Extra: full
|
|
20
|
+
Requires-Dist: earthengine-api (>=0.1.392) ; extra == "full"
|
|
16
21
|
Requires-Dist: numpy (>=1.25.2)
|
|
17
22
|
Requires-Dist: pandas (>=2.0.3)
|
|
23
|
+
Requires-Dist: pyarrow (>=14.0.0) ; extra == "full"
|
|
24
|
+
Requires-Dist: pygeohash (>=1.2.0,<2.0.0)
|
|
25
|
+
Requires-Dist: rasterio (>=1.3.9) ; extra == "full"
|
|
18
26
|
Requires-Dist: utm (>=0.8.0,<0.9.0)
|
|
19
|
-
Project-URL: Documentation, https://andesdatacube.github.io/cubexpress
|
|
20
|
-
Project-URL: Repository, https://github.com/andesdatacube/cubexpress
|
|
27
|
+
Project-URL: Documentation, https://andesdatacube.github.io/cubexpress
|
|
28
|
+
Project-URL: Repository, https://github.com/andesdatacube/cubexpress
|
|
21
29
|
Description-Content-Type: text/markdown
|
|
22
30
|
|
|
23
31
|
<h1></h1>
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
cubexpress/__init__.py,sha256=ybNczt27OVUkT8WE8v0-A9hByKtfNsNysg-V8CnZqxE,564
|
|
2
|
+
cubexpress/cache.py,sha256=EZiR2AJfplaLpqMIVFb5piCAgFqHKF1vgLIrutfz8tA,1425
|
|
3
|
+
cubexpress/cloud_utils.py,sha256=O4qcl8kr0_Yv0giZ-h4uzf791d1_z9HZy1Br8N867iA,8102
|
|
4
|
+
cubexpress/conversion.py,sha256=JSaMnswY-2n5E4H2zxb-oEOTJ8UPzXfMeSVCremtvTw,2520
|
|
5
|
+
cubexpress/cube.py,sha256=1GPVAt5Q0vGqA3QJ4gixAevlosV4JHNKMzN1PirhawI,2911
|
|
6
|
+
cubexpress/downloader.py,sha256=u0u1LG2DOIaEvCPDIAaJDLH5_od52D1OPCbcpCicRzY,3320
|
|
7
|
+
cubexpress/geospatial.py,sha256=ZbsPIgsYQFnNFXUuQ136rJsL4b2Bf91o0Vsswby2dFc,1812
|
|
8
|
+
cubexpress/geotyping.py,sha256=6hjzjZhg6jRYRhLMQ_IiBygnShWlRCtpIbf6rRaQQ7s,17163
|
|
9
|
+
cubexpress/request.py,sha256=cRm0J6Um8wCkbMDYBv9eCiqv32hLH28EH4eHLLDsJ-c,2333
|
|
10
|
+
cubexpress-0.1.1.dist-info/LICENSE,sha256=XjoS-d76b7Cl-VgCWhQk83tNf2dNldKBN8SrImwGc2Q,1072
|
|
11
|
+
cubexpress-0.1.1.dist-info/METADATA,sha256=3DhrMNjKWIfImjQgfCVRfade1JXcX2acDJX4iPtwR4U,9692
|
|
12
|
+
cubexpress-0.1.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
13
|
+
cubexpress-0.1.1.dist-info/RECORD,,
|
cubexpress/download.py
DELETED
|
@@ -1,347 +0,0 @@
|
|
|
1
|
-
import concurrent.futures
|
|
2
|
-
import json
|
|
3
|
-
import pathlib
|
|
4
|
-
from concurrent.futures import ThreadPoolExecutor
|
|
5
|
-
from copy import deepcopy
|
|
6
|
-
from typing import Optional
|
|
7
|
-
|
|
8
|
-
import ee
|
|
9
|
-
import numpy as np
|
|
10
|
-
import pandas as pd
|
|
11
|
-
|
|
12
|
-
from cubexpress.geotyping import RequestSet
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def check_not_found_error(error_message: str) -> bool:
|
|
16
|
-
"""
|
|
17
|
-
Checks if the error message indicates that the image was not found.
|
|
18
|
-
|
|
19
|
-
Args:
|
|
20
|
-
error_message (str): The error message to check.
|
|
21
|
-
|
|
22
|
-
Returns:
|
|
23
|
-
bool: True if the error message indicates "not found", False otherwise.
|
|
24
|
-
|
|
25
|
-
Example:
|
|
26
|
-
>>> check_not_found_error("Total request size must be less than or equal to...")
|
|
27
|
-
True
|
|
28
|
-
"""
|
|
29
|
-
return (
|
|
30
|
-
"Total request size" in error_message
|
|
31
|
-
and "must be less than or equal to" in error_message
|
|
32
|
-
)
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def quadsplit_manifest(manifest: dict) -> list[dict]:
|
|
36
|
-
"""
|
|
37
|
-
Splits a manifest into four smaller ones by dividing the grid dimensions.
|
|
38
|
-
|
|
39
|
-
Args:
|
|
40
|
-
manifest (dict): The original manifest to split.
|
|
41
|
-
|
|
42
|
-
Returns:
|
|
43
|
-
List[dict]: A list of four smaller manifests with updated grid transformations.
|
|
44
|
-
|
|
45
|
-
Example:
|
|
46
|
-
>>> manifest = {'grid': {'dimensions': {'width': 100, 'height': 100}, 'affineTransform': {'scaleX': 0.1, 'scaleY': 0.1, 'translateX': 0, 'translateY': 0}}}
|
|
47
|
-
>>> quadsplit_manifest(manifest)
|
|
48
|
-
[{'grid': {'dimensions': {'width': 50, 'height': 50}, 'affineTransform': {'scaleX': 0.1, 'scaleY': 0.1, 'translateX': 0, 'translateY': 0}}}, {'grid': {'dimensions': {'width': 50, 'height': 50}, 'affineTransform': {'scaleX': 0.1, 'scaleY': 0.1, 'translateX': 5.0, 'translateY': 0}}}, ...]
|
|
49
|
-
"""
|
|
50
|
-
manifest_copy = deepcopy(manifest)
|
|
51
|
-
new_width = manifest["grid"]["dimensions"]["width"] // 2
|
|
52
|
-
new_height = manifest["grid"]["dimensions"]["height"] // 2
|
|
53
|
-
manifest_copy["grid"]["dimensions"]["width"] = new_width
|
|
54
|
-
manifest_copy["grid"]["dimensions"]["height"] = new_height
|
|
55
|
-
|
|
56
|
-
manifests = []
|
|
57
|
-
for idx in range(4):
|
|
58
|
-
new_manifest = deepcopy(manifest_copy)
|
|
59
|
-
res_x = manifest["grid"]["affineTransform"]["scaleX"]
|
|
60
|
-
res_y = manifest["grid"]["affineTransform"]["scaleY"]
|
|
61
|
-
|
|
62
|
-
add_x, add_y = (0, 0)
|
|
63
|
-
if idx == 1:
|
|
64
|
-
add_x = new_width * res_x
|
|
65
|
-
elif idx == 2:
|
|
66
|
-
add_y = new_height * res_y
|
|
67
|
-
elif idx == 3:
|
|
68
|
-
add_x = new_width * res_x
|
|
69
|
-
add_y = new_height * res_y
|
|
70
|
-
|
|
71
|
-
new_manifest["grid"]["affineTransform"]["translateX"] += add_x
|
|
72
|
-
new_manifest["grid"]["affineTransform"]["translateY"] += add_y
|
|
73
|
-
|
|
74
|
-
manifests.append(new_manifest)
|
|
75
|
-
|
|
76
|
-
return manifests
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
def getGeoTIFFbatch(
|
|
80
|
-
manifest_dict: dict,
|
|
81
|
-
full_outname: pathlib.Path,
|
|
82
|
-
max_deep_level: Optional[int] = 5,
|
|
83
|
-
method: Optional[str] = "getPixels",
|
|
84
|
-
) -> Optional[np.ndarray]:
|
|
85
|
-
"""
|
|
86
|
-
Downloads a GeoTIFF image from Google Earth Engine using either the `getPixels` or `computePixels` method.
|
|
87
|
-
If the requested area exceeds the size limit, the image is recursively split into smaller tiles until the
|
|
88
|
-
download succeeds or the maximum recursion depth is reached.
|
|
89
|
-
|
|
90
|
-
Args:
|
|
91
|
-
manifest_dict (dict): A dictionary containing image metadata, including grid dimensions, affine transformations,
|
|
92
|
-
and either an `assetId` or `expression` for the image source.
|
|
93
|
-
full_outname (pathlib.Path): The full path where the downloaded GeoTIFF file will be saved.
|
|
94
|
-
max_deep_level (Optional[int]): Maximum recursion depth for splitting large requests. Defaults to 5.
|
|
95
|
-
method (Optional[str]): Method for retrieving image data. Can be 'getPixels' for asset-based requests or
|
|
96
|
-
'computePixels' for expressions. Defaults to 'getPixels'.
|
|
97
|
-
|
|
98
|
-
Returns:
|
|
99
|
-
Optional[pathlib.Path]: The path to the downloaded GeoTIFF file. Returns `None` if the download fails.
|
|
100
|
-
|
|
101
|
-
Raises:
|
|
102
|
-
ValueError: If the method is not 'getPixels' or 'computePixels', or if the image cannot be found.
|
|
103
|
-
|
|
104
|
-
Example:
|
|
105
|
-
>>> import ee
|
|
106
|
-
>>> import pathlib
|
|
107
|
-
>>> ee.Initialize()
|
|
108
|
-
>>> manifest_dict = {
|
|
109
|
-
... "assetId": "COPERNICUS/S2_HARMONIZED/20160816T153912_20160816T154443_T18TYN",
|
|
110
|
-
... "fileFormat": "GEO_TIFF",
|
|
111
|
-
... "bandIds": ["B4", "B3", "B2"],
|
|
112
|
-
... "grid": {
|
|
113
|
-
... "dimensions": {
|
|
114
|
-
... "width": 512,
|
|
115
|
-
... "height": 512
|
|
116
|
-
... },
|
|
117
|
-
... "affineTransform": {
|
|
118
|
-
... "scaleX": 10,
|
|
119
|
-
... "shearX": 0,
|
|
120
|
-
... "translateX": 725260.108545126,
|
|
121
|
-
... "scaleY": -10,
|
|
122
|
-
... "shearY": 0,
|
|
123
|
-
... "translateY": 4701550.38712196
|
|
124
|
-
... },
|
|
125
|
-
... "crsCode": "EPSG:32618"
|
|
126
|
-
... }
|
|
127
|
-
... }
|
|
128
|
-
|
|
129
|
-
>>> getGeoTIFFbatch(manifest_dict pathlib.Path('output/sentinel_image.tif'))
|
|
130
|
-
PosixPath('output/sentinel_image.tif')
|
|
131
|
-
"""
|
|
132
|
-
|
|
133
|
-
# Check if the maximum recursion depth has been reached
|
|
134
|
-
if max_deep_level == 0:
|
|
135
|
-
raise ValueError("Max recursion depth reached.")
|
|
136
|
-
|
|
137
|
-
try:
|
|
138
|
-
# Get the image bytes
|
|
139
|
-
if method == "getPixels":
|
|
140
|
-
image_bytes: bytes = ee.data.getPixels(manifest_dict)
|
|
141
|
-
elif method == "computePixels":
|
|
142
|
-
image_bytes: bytes = ee.data.computePixels(manifest_dict)
|
|
143
|
-
else:
|
|
144
|
-
raise ValueError("Method must be either 'getPixels' or 'computePixels'")
|
|
145
|
-
|
|
146
|
-
# Write the image bytes to a file
|
|
147
|
-
with open(full_outname, "wb") as src:
|
|
148
|
-
src.write(image_bytes)
|
|
149
|
-
except Exception as e:
|
|
150
|
-
# TODO: This is a workaround when the image is not found, as it is a message from the server
|
|
151
|
-
# it is not possible to check the type of the exception
|
|
152
|
-
if not check_not_found_error(str(e)):
|
|
153
|
-
raise ValueError(
|
|
154
|
-
f"Error downloading the GeoTIFF file from Earth Engine: {e}"
|
|
155
|
-
)
|
|
156
|
-
|
|
157
|
-
# Create the output directory if it doesn't exist
|
|
158
|
-
child_folder: pathlib.Path = full_outname.parent / full_outname.stem
|
|
159
|
-
pathlib.Path(child_folder).mkdir(parents=True, exist_ok=True)
|
|
160
|
-
|
|
161
|
-
# Split the manifest into four smaller manifests
|
|
162
|
-
manifest_dicts = quadsplit_manifest(manifest_dict)
|
|
163
|
-
|
|
164
|
-
for idx, manifest_dict_batch in enumerate(manifest_dicts):
|
|
165
|
-
# Recursively download the image
|
|
166
|
-
getGeoTIFFbatch(
|
|
167
|
-
full_outname=child_folder / ("%s__%02d.tif" % (full_outname.stem, idx)),
|
|
168
|
-
manifest_dict=manifest_dict_batch,
|
|
169
|
-
max_deep_level=max_deep_level - 1,
|
|
170
|
-
method=method,
|
|
171
|
-
)
|
|
172
|
-
|
|
173
|
-
return full_outname
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
def getGeoTIFF(
|
|
177
|
-
manifest_dict: dict, full_outname: pathlib.Path, max_deep_level: Optional[int] = 5
|
|
178
|
-
) -> Optional[np.ndarray]:
|
|
179
|
-
"""
|
|
180
|
-
Retrieves an image from Earth Engine using the appropriate method based on the manifest type.
|
|
181
|
-
|
|
182
|
-
This function downloads a GeoTIFF image from Google Earth Engine (GEE). Depending on the content of
|
|
183
|
-
the provided manifest (`manifest_dict`), the function will either use the `getPixels` method (for
|
|
184
|
-
asset-based requests) or the `computePixels` method (for expressions). If the requested area exceeds
|
|
185
|
-
the size limit, the image will be recursively split into smaller tiles until the download succeeds or
|
|
186
|
-
the maximum recursion depth is reached.
|
|
187
|
-
|
|
188
|
-
Args:
|
|
189
|
-
manifest_dict (dict): A dictionary containing the image metadata. This should include either:
|
|
190
|
-
- `assetId`: The identifier of a GEE asset (e.g., satellite imagery).
|
|
191
|
-
- `expression`: A serialized string representing a GEE image expression (e.g., an image computation).
|
|
192
|
-
Additionally, the manifest should include grid information such as the image dimensions and affine transformations.
|
|
193
|
-
|
|
194
|
-
full_outname (pathlib.Path): The full path where the downloaded GeoTIFF file will be saved.
|
|
195
|
-
|
|
196
|
-
max_deep_level (Optional[int]): The maximum recursion depth for splitting large requests into smaller tiles if needed.
|
|
197
|
-
Defaults to 5.
|
|
198
|
-
|
|
199
|
-
Returns:
|
|
200
|
-
Optional[np.ndarray]: The downloaded image as a `numpy` array, or `None` if the download fails. It will
|
|
201
|
-
also return the full file path to the saved GeoTIFF image.
|
|
202
|
-
|
|
203
|
-
Raises:
|
|
204
|
-
ValueError: If the manifest does not contain either an `assetId` or `expression`, or if there is an error during download.
|
|
205
|
-
|
|
206
|
-
Example 1: Downloading an image using an `assetId`:
|
|
207
|
-
>>> import ee
|
|
208
|
-
>>> import pathlib
|
|
209
|
-
>>> ee.Initialize()
|
|
210
|
-
>>> manifest_dict = {
|
|
211
|
-
... "assetId": "COPERNICUS/S2_HARMONIZED/20160816T153912_20160816T154443_T18TYN",
|
|
212
|
-
... "fileFormat": "GEO_TIFF",
|
|
213
|
-
... "bandIds": ["B4", "B3", "B2"],
|
|
214
|
-
... "grid": {
|
|
215
|
-
... "dimensions": {"width": 512, "height": 512},
|
|
216
|
-
... "affineTransform": {
|
|
217
|
-
... "scaleX": 10,
|
|
218
|
-
... "shearX": 0,
|
|
219
|
-
... "translateX": 725260.108545126,
|
|
220
|
-
... "scaleY": -10,
|
|
221
|
-
... "shearY": 0,
|
|
222
|
-
... "translateY": 4701550.38712196
|
|
223
|
-
... },
|
|
224
|
-
... "crsCode": "EPSG:32618"
|
|
225
|
-
... }
|
|
226
|
-
... }
|
|
227
|
-
>>> getGeoTIFF(manifest_dict, pathlib.Path('output/sentinel_image.tif'))
|
|
228
|
-
PosixPath('output/sentinel_image.tif')
|
|
229
|
-
|
|
230
|
-
Example 2: Downloading an image using an `expression`:
|
|
231
|
-
>>> image = ee.Image("COPERNICUS/S2_HARMONIZED/20160816T153912_20160816T154443_T18TYN") \
|
|
232
|
-
... .divide(10_000) \
|
|
233
|
-
... .select(["B4", "B3", "B2"])
|
|
234
|
-
>>> expression = image.serialize()
|
|
235
|
-
>>> manifest_dict = {
|
|
236
|
-
... "expression": expression,
|
|
237
|
-
... "fileFormat": "GEO_TIFF",
|
|
238
|
-
... "grid": {
|
|
239
|
-
... "dimensions": {"width": 512, "height": 512},
|
|
240
|
-
... "affineTransform": {
|
|
241
|
-
... "scaleX": 10,
|
|
242
|
-
... "shearX": 0,
|
|
243
|
-
... "translateX": 725260.108545126,
|
|
244
|
-
... "scaleY": -10,
|
|
245
|
-
... "shearY": 0,
|
|
246
|
-
... "translateY": 4701550.38712196
|
|
247
|
-
... },
|
|
248
|
-
... "crsCode": "EPSG:32618"
|
|
249
|
-
... }
|
|
250
|
-
... }
|
|
251
|
-
>>> getGeoTIFF(manifest_dict, pathlib.Path('output/expression_image.tif'))
|
|
252
|
-
PosixPath('output/expression_image.tif')
|
|
253
|
-
"""
|
|
254
|
-
if "assetId" in manifest_dict:
|
|
255
|
-
return getGeoTIFFbatch(
|
|
256
|
-
manifest_dict=manifest_dict,
|
|
257
|
-
full_outname=full_outname,
|
|
258
|
-
max_deep_level=max_deep_level,
|
|
259
|
-
method="getPixels",
|
|
260
|
-
)
|
|
261
|
-
elif "expression" in manifest_dict:
|
|
262
|
-
if isinstance(
|
|
263
|
-
manifest_dict["expression"], str
|
|
264
|
-
): # Decode only if the expression is still a string.
|
|
265
|
-
# From a string to a ee.Image object
|
|
266
|
-
manifest_dict["expression"] = ee.deserializer.decode(
|
|
267
|
-
json.loads(manifest_dict["expression"])
|
|
268
|
-
)
|
|
269
|
-
|
|
270
|
-
return getGeoTIFFbatch(
|
|
271
|
-
manifest_dict=manifest_dict,
|
|
272
|
-
full_outname=full_outname,
|
|
273
|
-
max_deep_level=max_deep_level,
|
|
274
|
-
method="computePixels",
|
|
275
|
-
)
|
|
276
|
-
else:
|
|
277
|
-
raise ValueError("Manifest does not contain 'assetId' or 'expression'")
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
def getcube(
|
|
281
|
-
request: RequestSet,
|
|
282
|
-
output_path: str | pathlib.Path,
|
|
283
|
-
nworkers: Optional[int] = None,
|
|
284
|
-
max_deep_level: Optional[int] = 5,
|
|
285
|
-
) -> list[pathlib.Path]:
|
|
286
|
-
"""
|
|
287
|
-
Downloads multiple GeoTIFF images in parallel from Google Earth Engine (GEE) based on the provided request set.
|
|
288
|
-
|
|
289
|
-
Args:
|
|
290
|
-
request (RequestSet): A collection of image requests containing metadata and processing parameters.
|
|
291
|
-
output_path (Union[str, pathlib.Path]): Directory where the downloaded images will be saved.
|
|
292
|
-
nworkers (Optional[int], default=None): Number of parallel threads. If None, runs sequentially.
|
|
293
|
-
max_deep_level (Optional[int], default=5): Maximum recursion depth for image subdivision if exceeding GEE limits.
|
|
294
|
-
|
|
295
|
-
Returns:
|
|
296
|
-
List[pathlib.Path]: List of paths to the downloaded GeoTIFF files.
|
|
297
|
-
|
|
298
|
-
Example:
|
|
299
|
-
>>> import ee, cubexpress
|
|
300
|
-
>>> ee.Initialize()
|
|
301
|
-
>>> point = ee.Geometry.Point([-97.59, 33.37])
|
|
302
|
-
>>> collection = ee.ImageCollection("COPERNICUS/S2_SR_HARMONIZED") \
|
|
303
|
-
... .filterBounds(point) \
|
|
304
|
-
... .filterDate('2024-01-01', '2024-01-31')
|
|
305
|
-
>>> image_ids = collection.aggregate_array('system:id').getInfo()
|
|
306
|
-
>>> geotransform = cubexpress.lonlat2rt(lon=-97.59, lat=33.37, edge_size=128, scale=10)
|
|
307
|
-
>>> requests = [cubexpress.Request(id=f"s2_{i}", raster_transform=geotransform, bands=["B4", "B3", "B2"], image=ee.Image(img_id)) for i, img_id in enumerate(image_ids)]
|
|
308
|
-
>>> cube_requests = cubexpress.RequestSet(requestset=requests)
|
|
309
|
-
>>> cubexpress.getcube(request=cube_requests, nworkers=4, output_path="output", max_deep_level=5)
|
|
310
|
-
[PosixPath('output/s2_0.tif'), PosixPath('output/s2_1.tif'), ...]
|
|
311
|
-
"""
|
|
312
|
-
|
|
313
|
-
# Check that _dataframe exists and is not empty
|
|
314
|
-
if request._dataframe is None or request._dataframe.empty:
|
|
315
|
-
raise ValueError(
|
|
316
|
-
"The request's _dataframe is None or empty. "
|
|
317
|
-
"There are no valid requests to process."
|
|
318
|
-
)
|
|
319
|
-
|
|
320
|
-
# **Revalidate** the DataFrame structure, in case the user manipulated it.
|
|
321
|
-
request._validate_dataframe_schema()
|
|
322
|
-
|
|
323
|
-
# Get the table
|
|
324
|
-
table: pd.DataFrame = request._dataframe
|
|
325
|
-
|
|
326
|
-
# Create the output directory if it doesn't exist
|
|
327
|
-
output_path = pathlib.Path(output_path)
|
|
328
|
-
output_path.mkdir(parents=True, exist_ok=True)
|
|
329
|
-
|
|
330
|
-
results = []
|
|
331
|
-
with ThreadPoolExecutor(max_workers=nworkers) as executor:
|
|
332
|
-
futures = {
|
|
333
|
-
executor.submit(
|
|
334
|
-
getGeoTIFF, row.manifest, output_path / row.outname, max_deep_level
|
|
335
|
-
): row
|
|
336
|
-
for _, row in table.iterrows()
|
|
337
|
-
}
|
|
338
|
-
for future in concurrent.futures.as_completed(futures):
|
|
339
|
-
try:
|
|
340
|
-
result = future.result()
|
|
341
|
-
if result:
|
|
342
|
-
results.append(result)
|
|
343
|
-
except Exception as e:
|
|
344
|
-
# TODO add this into the log
|
|
345
|
-
print(f"Error processing {futures[future].outname}: {e}")
|
|
346
|
-
|
|
347
|
-
return results
|
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
cubexpress/__init__.py,sha256=1CF6kINn70mfS5HNzYyTf4UsOUPG0qzeetoJSDk0ALw,418
|
|
2
|
-
cubexpress/conversion.py,sha256=h77re8AtdVV_Jy3ugZeQ-e2I8DHSKoghiq70MXkzBaQ,2506
|
|
3
|
-
cubexpress/download.py,sha256=DX5DKPdKiuv1gHxs-5Q5ScZ06nvE-Pi1YGLSzQc2jrs,14315
|
|
4
|
-
cubexpress/geotyping.py,sha256=5JgsOfRfwQf-iBh902wKQ1AxEKw1HgFL2brzwkxO0Pg,17152
|
|
5
|
-
cubexpress-0.1.0.dist-info/LICENSE,sha256=XjoS-d76b7Cl-VgCWhQk83tNf2dNldKBN8SrImwGc2Q,1072
|
|
6
|
-
cubexpress-0.1.0.dist-info/METADATA,sha256=XfBIfpFP1quHSNr60Dn6R8EEpdq02XJWCepwhl7j7U0,9327
|
|
7
|
-
cubexpress-0.1.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
8
|
-
cubexpress-0.1.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|