cubexpress 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cubexpress might be problematic. Click here for more details.
- cubexpress/__init__.py +18 -0
- cubexpress/conversion.py +73 -0
- cubexpress/download.py +347 -0
- cubexpress/geotyping.py +488 -0
- cubexpress-0.1.0.dist-info/LICENSE +22 -0
- cubexpress-0.1.0.dist-info/METADATA +305 -0
- cubexpress-0.1.0.dist-info/RECORD +8 -0
- cubexpress-0.1.0.dist-info/WHEEL +4 -0
cubexpress/__init__.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from cubexpress.conversion import lonlat2rt
|
|
2
|
+
from cubexpress.download import getcube, getGeoTIFF
|
|
3
|
+
from cubexpress.geotyping import RasterTransform, Request, RequestSet
|
|
4
|
+
|
|
5
|
+
# Export the functions
|
|
6
|
+
__all__ = [
|
|
7
|
+
"lonlat2rt",
|
|
8
|
+
"RasterTransform",
|
|
9
|
+
"Request",
|
|
10
|
+
"RequestSet",
|
|
11
|
+
"getcube",
|
|
12
|
+
"getGeoTIFF",
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
# Dynamic version import
|
|
16
|
+
import importlib.metadata
|
|
17
|
+
|
|
18
|
+
__version__ = importlib.metadata.version("cubexpress")
|
cubexpress/conversion.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import utm
|
|
2
|
+
|
|
3
|
+
from cubexpress.geotyping import RasterTransform
|
|
4
|
+
|
|
5
|
+
# Define your GeotransformDict type if not already defined
|
|
6
|
+
GeotransformDict = dict[str, float]
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def geo2utm(lon: float, lat: float) -> tuple[float, float, str]:
|
|
10
|
+
"""
|
|
11
|
+
Converts latitude and longitude coordinates to UTM coordinates and returns the EPSG code.
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
lon (float): Longitude.
|
|
15
|
+
lat (float): Latitude.
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
Tuple[float, float, str]: UTM coordinates (x, y) and the EPSG code.
|
|
19
|
+
"""
|
|
20
|
+
x, y, zone, _ = utm.from_latlon(lat, lon)
|
|
21
|
+
epsg_code = f"326{zone:02d}" if lat >= 0 else f"327{zone:02d}"
|
|
22
|
+
return x, y, f"EPSG:{epsg_code}"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def lonlat2rt(lon: float, lat: float, edge_size: int, scale: int) -> RasterTransform:
|
|
26
|
+
"""
|
|
27
|
+
Generates a ``RasterTransform`` for a given point by converting geographic (lon, lat) coordinates
|
|
28
|
+
to UTM projection and building the necessary geotransform metadata.
|
|
29
|
+
|
|
30
|
+
This function:
|
|
31
|
+
1. Converts the input (lon, lat) to UTM coordinates using :func:`geo2utm`.
|
|
32
|
+
2. Defines the extent of the raster in UTM meters based on the specified ``edge_size`` (width/height in pixels)
|
|
33
|
+
and ``scale`` (meters per pixel).
|
|
34
|
+
3. Sets the Y-scale to be negative (``-scale``) because geospatial images typically consider the origin at
|
|
35
|
+
the top-left corner, resulting in a downward Y axis.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
lon (float): The longitude coordinate.
|
|
39
|
+
lat (float): The latitude coordinate.
|
|
40
|
+
edge_size (int): Width and height of the output raster in pixels.
|
|
41
|
+
scale (int): Spatial resolution in meters per pixel.
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
RasterTransform: A Pydantic model containing:
|
|
45
|
+
- ``crs``: The EPSG code in the form ``"EPSG:XYZ"``,
|
|
46
|
+
- ``geotransform``: A dictionary with the affine transform parameters,
|
|
47
|
+
- ``width`` and ``height``.
|
|
48
|
+
|
|
49
|
+
Example:
|
|
50
|
+
>>> import cubexpress
|
|
51
|
+
>>> rt = cubexpress.lonlat2rt(
|
|
52
|
+
... lon=-76.0,
|
|
53
|
+
... lat=40.0,
|
|
54
|
+
... edge_size=512,
|
|
55
|
+
... scale=30
|
|
56
|
+
... )
|
|
57
|
+
>>> print(rt)
|
|
58
|
+
"""
|
|
59
|
+
x, y, crs = geo2utm(lon, lat)
|
|
60
|
+
half_extent = (edge_size * scale) / 2
|
|
61
|
+
|
|
62
|
+
geotransform = GeotransformDict(
|
|
63
|
+
scaleX=scale,
|
|
64
|
+
shearX=0,
|
|
65
|
+
translateX=x - half_extent,
|
|
66
|
+
scaleY=-scale, # Y-axis is inverted in geospatial images
|
|
67
|
+
shearY=0,
|
|
68
|
+
translateY=y + half_extent,
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
return RasterTransform(
|
|
72
|
+
crs=crs, geotransform=geotransform, width=edge_size, height=edge_size
|
|
73
|
+
)
|
cubexpress/download.py
ADDED
|
@@ -0,0 +1,347 @@
|
|
|
1
|
+
import concurrent.futures
|
|
2
|
+
import json
|
|
3
|
+
import pathlib
|
|
4
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
5
|
+
from copy import deepcopy
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
import ee
|
|
9
|
+
import numpy as np
|
|
10
|
+
import pandas as pd
|
|
11
|
+
|
|
12
|
+
from cubexpress.geotyping import RequestSet
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def check_not_found_error(error_message: str) -> bool:
|
|
16
|
+
"""
|
|
17
|
+
Checks if the error message indicates that the image was not found.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
error_message (str): The error message to check.
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
bool: True if the error message indicates "not found", False otherwise.
|
|
24
|
+
|
|
25
|
+
Example:
|
|
26
|
+
>>> check_not_found_error("Total request size must be less than or equal to...")
|
|
27
|
+
True
|
|
28
|
+
"""
|
|
29
|
+
return (
|
|
30
|
+
"Total request size" in error_message
|
|
31
|
+
and "must be less than or equal to" in error_message
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def quadsplit_manifest(manifest: dict) -> list[dict]:
|
|
36
|
+
"""
|
|
37
|
+
Splits a manifest into four smaller ones by dividing the grid dimensions.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
manifest (dict): The original manifest to split.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
List[dict]: A list of four smaller manifests with updated grid transformations.
|
|
44
|
+
|
|
45
|
+
Example:
|
|
46
|
+
>>> manifest = {'grid': {'dimensions': {'width': 100, 'height': 100}, 'affineTransform': {'scaleX': 0.1, 'scaleY': 0.1, 'translateX': 0, 'translateY': 0}}}
|
|
47
|
+
>>> quadsplit_manifest(manifest)
|
|
48
|
+
[{'grid': {'dimensions': {'width': 50, 'height': 50}, 'affineTransform': {'scaleX': 0.1, 'scaleY': 0.1, 'translateX': 0, 'translateY': 0}}}, {'grid': {'dimensions': {'width': 50, 'height': 50}, 'affineTransform': {'scaleX': 0.1, 'scaleY': 0.1, 'translateX': 5.0, 'translateY': 0}}}, ...]
|
|
49
|
+
"""
|
|
50
|
+
manifest_copy = deepcopy(manifest)
|
|
51
|
+
new_width = manifest["grid"]["dimensions"]["width"] // 2
|
|
52
|
+
new_height = manifest["grid"]["dimensions"]["height"] // 2
|
|
53
|
+
manifest_copy["grid"]["dimensions"]["width"] = new_width
|
|
54
|
+
manifest_copy["grid"]["dimensions"]["height"] = new_height
|
|
55
|
+
|
|
56
|
+
manifests = []
|
|
57
|
+
for idx in range(4):
|
|
58
|
+
new_manifest = deepcopy(manifest_copy)
|
|
59
|
+
res_x = manifest["grid"]["affineTransform"]["scaleX"]
|
|
60
|
+
res_y = manifest["grid"]["affineTransform"]["scaleY"]
|
|
61
|
+
|
|
62
|
+
add_x, add_y = (0, 0)
|
|
63
|
+
if idx == 1:
|
|
64
|
+
add_x = new_width * res_x
|
|
65
|
+
elif idx == 2:
|
|
66
|
+
add_y = new_height * res_y
|
|
67
|
+
elif idx == 3:
|
|
68
|
+
add_x = new_width * res_x
|
|
69
|
+
add_y = new_height * res_y
|
|
70
|
+
|
|
71
|
+
new_manifest["grid"]["affineTransform"]["translateX"] += add_x
|
|
72
|
+
new_manifest["grid"]["affineTransform"]["translateY"] += add_y
|
|
73
|
+
|
|
74
|
+
manifests.append(new_manifest)
|
|
75
|
+
|
|
76
|
+
return manifests
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def getGeoTIFFbatch(
|
|
80
|
+
manifest_dict: dict,
|
|
81
|
+
full_outname: pathlib.Path,
|
|
82
|
+
max_deep_level: Optional[int] = 5,
|
|
83
|
+
method: Optional[str] = "getPixels",
|
|
84
|
+
) -> Optional[np.ndarray]:
|
|
85
|
+
"""
|
|
86
|
+
Downloads a GeoTIFF image from Google Earth Engine using either the `getPixels` or `computePixels` method.
|
|
87
|
+
If the requested area exceeds the size limit, the image is recursively split into smaller tiles until the
|
|
88
|
+
download succeeds or the maximum recursion depth is reached.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
manifest_dict (dict): A dictionary containing image metadata, including grid dimensions, affine transformations,
|
|
92
|
+
and either an `assetId` or `expression` for the image source.
|
|
93
|
+
full_outname (pathlib.Path): The full path where the downloaded GeoTIFF file will be saved.
|
|
94
|
+
max_deep_level (Optional[int]): Maximum recursion depth for splitting large requests. Defaults to 5.
|
|
95
|
+
method (Optional[str]): Method for retrieving image data. Can be 'getPixels' for asset-based requests or
|
|
96
|
+
'computePixels' for expressions. Defaults to 'getPixels'.
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
Optional[pathlib.Path]: The path to the downloaded GeoTIFF file. Returns `None` if the download fails.
|
|
100
|
+
|
|
101
|
+
Raises:
|
|
102
|
+
ValueError: If the method is not 'getPixels' or 'computePixels', or if the image cannot be found.
|
|
103
|
+
|
|
104
|
+
Example:
|
|
105
|
+
>>> import ee
|
|
106
|
+
>>> import pathlib
|
|
107
|
+
>>> ee.Initialize()
|
|
108
|
+
>>> manifest_dict = {
|
|
109
|
+
... "assetId": "COPERNICUS/S2_HARMONIZED/20160816T153912_20160816T154443_T18TYN",
|
|
110
|
+
... "fileFormat": "GEO_TIFF",
|
|
111
|
+
... "bandIds": ["B4", "B3", "B2"],
|
|
112
|
+
... "grid": {
|
|
113
|
+
... "dimensions": {
|
|
114
|
+
... "width": 512,
|
|
115
|
+
... "height": 512
|
|
116
|
+
... },
|
|
117
|
+
... "affineTransform": {
|
|
118
|
+
... "scaleX": 10,
|
|
119
|
+
... "shearX": 0,
|
|
120
|
+
... "translateX": 725260.108545126,
|
|
121
|
+
... "scaleY": -10,
|
|
122
|
+
... "shearY": 0,
|
|
123
|
+
... "translateY": 4701550.38712196
|
|
124
|
+
... },
|
|
125
|
+
... "crsCode": "EPSG:32618"
|
|
126
|
+
... }
|
|
127
|
+
... }
|
|
128
|
+
|
|
129
|
+
>>> getGeoTIFFbatch(manifest_dict pathlib.Path('output/sentinel_image.tif'))
|
|
130
|
+
PosixPath('output/sentinel_image.tif')
|
|
131
|
+
"""
|
|
132
|
+
|
|
133
|
+
# Check if the maximum recursion depth has been reached
|
|
134
|
+
if max_deep_level == 0:
|
|
135
|
+
raise ValueError("Max recursion depth reached.")
|
|
136
|
+
|
|
137
|
+
try:
|
|
138
|
+
# Get the image bytes
|
|
139
|
+
if method == "getPixels":
|
|
140
|
+
image_bytes: bytes = ee.data.getPixels(manifest_dict)
|
|
141
|
+
elif method == "computePixels":
|
|
142
|
+
image_bytes: bytes = ee.data.computePixels(manifest_dict)
|
|
143
|
+
else:
|
|
144
|
+
raise ValueError("Method must be either 'getPixels' or 'computePixels'")
|
|
145
|
+
|
|
146
|
+
# Write the image bytes to a file
|
|
147
|
+
with open(full_outname, "wb") as src:
|
|
148
|
+
src.write(image_bytes)
|
|
149
|
+
except Exception as e:
|
|
150
|
+
# TODO: This is a workaround when the image is not found, as it is a message from the server
|
|
151
|
+
# it is not possible to check the type of the exception
|
|
152
|
+
if not check_not_found_error(str(e)):
|
|
153
|
+
raise ValueError(
|
|
154
|
+
f"Error downloading the GeoTIFF file from Earth Engine: {e}"
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
# Create the output directory if it doesn't exist
|
|
158
|
+
child_folder: pathlib.Path = full_outname.parent / full_outname.stem
|
|
159
|
+
pathlib.Path(child_folder).mkdir(parents=True, exist_ok=True)
|
|
160
|
+
|
|
161
|
+
# Split the manifest into four smaller manifests
|
|
162
|
+
manifest_dicts = quadsplit_manifest(manifest_dict)
|
|
163
|
+
|
|
164
|
+
for idx, manifest_dict_batch in enumerate(manifest_dicts):
|
|
165
|
+
# Recursively download the image
|
|
166
|
+
getGeoTIFFbatch(
|
|
167
|
+
full_outname=child_folder / ("%s__%02d.tif" % (full_outname.stem, idx)),
|
|
168
|
+
manifest_dict=manifest_dict_batch,
|
|
169
|
+
max_deep_level=max_deep_level - 1,
|
|
170
|
+
method=method,
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
return full_outname
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def getGeoTIFF(
|
|
177
|
+
manifest_dict: dict, full_outname: pathlib.Path, max_deep_level: Optional[int] = 5
|
|
178
|
+
) -> Optional[np.ndarray]:
|
|
179
|
+
"""
|
|
180
|
+
Retrieves an image from Earth Engine using the appropriate method based on the manifest type.
|
|
181
|
+
|
|
182
|
+
This function downloads a GeoTIFF image from Google Earth Engine (GEE). Depending on the content of
|
|
183
|
+
the provided manifest (`manifest_dict`), the function will either use the `getPixels` method (for
|
|
184
|
+
asset-based requests) or the `computePixels` method (for expressions). If the requested area exceeds
|
|
185
|
+
the size limit, the image will be recursively split into smaller tiles until the download succeeds or
|
|
186
|
+
the maximum recursion depth is reached.
|
|
187
|
+
|
|
188
|
+
Args:
|
|
189
|
+
manifest_dict (dict): A dictionary containing the image metadata. This should include either:
|
|
190
|
+
- `assetId`: The identifier of a GEE asset (e.g., satellite imagery).
|
|
191
|
+
- `expression`: A serialized string representing a GEE image expression (e.g., an image computation).
|
|
192
|
+
Additionally, the manifest should include grid information such as the image dimensions and affine transformations.
|
|
193
|
+
|
|
194
|
+
full_outname (pathlib.Path): The full path where the downloaded GeoTIFF file will be saved.
|
|
195
|
+
|
|
196
|
+
max_deep_level (Optional[int]): The maximum recursion depth for splitting large requests into smaller tiles if needed.
|
|
197
|
+
Defaults to 5.
|
|
198
|
+
|
|
199
|
+
Returns:
|
|
200
|
+
Optional[np.ndarray]: The downloaded image as a `numpy` array, or `None` if the download fails. It will
|
|
201
|
+
also return the full file path to the saved GeoTIFF image.
|
|
202
|
+
|
|
203
|
+
Raises:
|
|
204
|
+
ValueError: If the manifest does not contain either an `assetId` or `expression`, or if there is an error during download.
|
|
205
|
+
|
|
206
|
+
Example 1: Downloading an image using an `assetId`:
|
|
207
|
+
>>> import ee
|
|
208
|
+
>>> import pathlib
|
|
209
|
+
>>> ee.Initialize()
|
|
210
|
+
>>> manifest_dict = {
|
|
211
|
+
... "assetId": "COPERNICUS/S2_HARMONIZED/20160816T153912_20160816T154443_T18TYN",
|
|
212
|
+
... "fileFormat": "GEO_TIFF",
|
|
213
|
+
... "bandIds": ["B4", "B3", "B2"],
|
|
214
|
+
... "grid": {
|
|
215
|
+
... "dimensions": {"width": 512, "height": 512},
|
|
216
|
+
... "affineTransform": {
|
|
217
|
+
... "scaleX": 10,
|
|
218
|
+
... "shearX": 0,
|
|
219
|
+
... "translateX": 725260.108545126,
|
|
220
|
+
... "scaleY": -10,
|
|
221
|
+
... "shearY": 0,
|
|
222
|
+
... "translateY": 4701550.38712196
|
|
223
|
+
... },
|
|
224
|
+
... "crsCode": "EPSG:32618"
|
|
225
|
+
... }
|
|
226
|
+
... }
|
|
227
|
+
>>> getGeoTIFF(manifest_dict, pathlib.Path('output/sentinel_image.tif'))
|
|
228
|
+
PosixPath('output/sentinel_image.tif')
|
|
229
|
+
|
|
230
|
+
Example 2: Downloading an image using an `expression`:
|
|
231
|
+
>>> image = ee.Image("COPERNICUS/S2_HARMONIZED/20160816T153912_20160816T154443_T18TYN") \
|
|
232
|
+
... .divide(10_000) \
|
|
233
|
+
... .select(["B4", "B3", "B2"])
|
|
234
|
+
>>> expression = image.serialize()
|
|
235
|
+
>>> manifest_dict = {
|
|
236
|
+
... "expression": expression,
|
|
237
|
+
... "fileFormat": "GEO_TIFF",
|
|
238
|
+
... "grid": {
|
|
239
|
+
... "dimensions": {"width": 512, "height": 512},
|
|
240
|
+
... "affineTransform": {
|
|
241
|
+
... "scaleX": 10,
|
|
242
|
+
... "shearX": 0,
|
|
243
|
+
... "translateX": 725260.108545126,
|
|
244
|
+
... "scaleY": -10,
|
|
245
|
+
... "shearY": 0,
|
|
246
|
+
... "translateY": 4701550.38712196
|
|
247
|
+
... },
|
|
248
|
+
... "crsCode": "EPSG:32618"
|
|
249
|
+
... }
|
|
250
|
+
... }
|
|
251
|
+
>>> getGeoTIFF(manifest_dict, pathlib.Path('output/expression_image.tif'))
|
|
252
|
+
PosixPath('output/expression_image.tif')
|
|
253
|
+
"""
|
|
254
|
+
if "assetId" in manifest_dict:
|
|
255
|
+
return getGeoTIFFbatch(
|
|
256
|
+
manifest_dict=manifest_dict,
|
|
257
|
+
full_outname=full_outname,
|
|
258
|
+
max_deep_level=max_deep_level,
|
|
259
|
+
method="getPixels",
|
|
260
|
+
)
|
|
261
|
+
elif "expression" in manifest_dict:
|
|
262
|
+
if isinstance(
|
|
263
|
+
manifest_dict["expression"], str
|
|
264
|
+
): # Decode only if the expression is still a string.
|
|
265
|
+
# From a string to a ee.Image object
|
|
266
|
+
manifest_dict["expression"] = ee.deserializer.decode(
|
|
267
|
+
json.loads(manifest_dict["expression"])
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
return getGeoTIFFbatch(
|
|
271
|
+
manifest_dict=manifest_dict,
|
|
272
|
+
full_outname=full_outname,
|
|
273
|
+
max_deep_level=max_deep_level,
|
|
274
|
+
method="computePixels",
|
|
275
|
+
)
|
|
276
|
+
else:
|
|
277
|
+
raise ValueError("Manifest does not contain 'assetId' or 'expression'")
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def getcube(
|
|
281
|
+
request: RequestSet,
|
|
282
|
+
output_path: str | pathlib.Path,
|
|
283
|
+
nworkers: Optional[int] = None,
|
|
284
|
+
max_deep_level: Optional[int] = 5,
|
|
285
|
+
) -> list[pathlib.Path]:
|
|
286
|
+
"""
|
|
287
|
+
Downloads multiple GeoTIFF images in parallel from Google Earth Engine (GEE) based on the provided request set.
|
|
288
|
+
|
|
289
|
+
Args:
|
|
290
|
+
request (RequestSet): A collection of image requests containing metadata and processing parameters.
|
|
291
|
+
output_path (Union[str, pathlib.Path]): Directory where the downloaded images will be saved.
|
|
292
|
+
nworkers (Optional[int], default=None): Number of parallel threads. If None, runs sequentially.
|
|
293
|
+
max_deep_level (Optional[int], default=5): Maximum recursion depth for image subdivision if exceeding GEE limits.
|
|
294
|
+
|
|
295
|
+
Returns:
|
|
296
|
+
List[pathlib.Path]: List of paths to the downloaded GeoTIFF files.
|
|
297
|
+
|
|
298
|
+
Example:
|
|
299
|
+
>>> import ee, cubexpress
|
|
300
|
+
>>> ee.Initialize()
|
|
301
|
+
>>> point = ee.Geometry.Point([-97.59, 33.37])
|
|
302
|
+
>>> collection = ee.ImageCollection("COPERNICUS/S2_SR_HARMONIZED") \
|
|
303
|
+
... .filterBounds(point) \
|
|
304
|
+
... .filterDate('2024-01-01', '2024-01-31')
|
|
305
|
+
>>> image_ids = collection.aggregate_array('system:id').getInfo()
|
|
306
|
+
>>> geotransform = cubexpress.lonlat2rt(lon=-97.59, lat=33.37, edge_size=128, scale=10)
|
|
307
|
+
>>> requests = [cubexpress.Request(id=f"s2_{i}", raster_transform=geotransform, bands=["B4", "B3", "B2"], image=ee.Image(img_id)) for i, img_id in enumerate(image_ids)]
|
|
308
|
+
>>> cube_requests = cubexpress.RequestSet(requestset=requests)
|
|
309
|
+
>>> cubexpress.getcube(request=cube_requests, nworkers=4, output_path="output", max_deep_level=5)
|
|
310
|
+
[PosixPath('output/s2_0.tif'), PosixPath('output/s2_1.tif'), ...]
|
|
311
|
+
"""
|
|
312
|
+
|
|
313
|
+
# Check that _dataframe exists and is not empty
|
|
314
|
+
if request._dataframe is None or request._dataframe.empty:
|
|
315
|
+
raise ValueError(
|
|
316
|
+
"The request's _dataframe is None or empty. "
|
|
317
|
+
"There are no valid requests to process."
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
# **Revalidate** the DataFrame structure, in case the user manipulated it.
|
|
321
|
+
request._validate_dataframe_schema()
|
|
322
|
+
|
|
323
|
+
# Get the table
|
|
324
|
+
table: pd.DataFrame = request._dataframe
|
|
325
|
+
|
|
326
|
+
# Create the output directory if it doesn't exist
|
|
327
|
+
output_path = pathlib.Path(output_path)
|
|
328
|
+
output_path.mkdir(parents=True, exist_ok=True)
|
|
329
|
+
|
|
330
|
+
results = []
|
|
331
|
+
with ThreadPoolExecutor(max_workers=nworkers) as executor:
|
|
332
|
+
futures = {
|
|
333
|
+
executor.submit(
|
|
334
|
+
getGeoTIFF, row.manifest, output_path / row.outname, max_deep_level
|
|
335
|
+
): row
|
|
336
|
+
for _, row in table.iterrows()
|
|
337
|
+
}
|
|
338
|
+
for future in concurrent.futures.as_completed(futures):
|
|
339
|
+
try:
|
|
340
|
+
result = future.result()
|
|
341
|
+
if result:
|
|
342
|
+
results.append(result)
|
|
343
|
+
except Exception as e:
|
|
344
|
+
# TODO add this into the log
|
|
345
|
+
print(f"Error processing {futures[future].outname}: {e}")
|
|
346
|
+
|
|
347
|
+
return results
|
cubexpress/geotyping.py
ADDED
|
@@ -0,0 +1,488 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from concurrent.futures import ProcessPoolExecutor
|
|
4
|
+
from typing import Any, Final, List, Set, TypeAlias
|
|
5
|
+
|
|
6
|
+
import ee
|
|
7
|
+
import pandas as pd
|
|
8
|
+
from pydantic import BaseModel, field_validator, model_validator
|
|
9
|
+
from pyproj import CRS, Transformer
|
|
10
|
+
from typing_extensions import TypedDict
|
|
11
|
+
|
|
12
|
+
# Type definitions
|
|
13
|
+
NumberType: TypeAlias = int | float
|
|
14
|
+
|
|
15
|
+
# Constants for required keys in the geotransform
|
|
16
|
+
REQUIRED_KEYS: Final[Set[str]] = {
|
|
17
|
+
"scaleX",
|
|
18
|
+
"shearX",
|
|
19
|
+
"translateX",
|
|
20
|
+
"scaleY",
|
|
21
|
+
"shearY",
|
|
22
|
+
"translateY",
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def get_transformer(source_crs_wkt: str) -> Transformer:
|
|
27
|
+
"""Get cached transformer from source CRS to WGS84 (EPSG:4326)"""
|
|
28
|
+
target_crs = CRS.from_epsg(4326)
|
|
29
|
+
return Transformer.from_crs(
|
|
30
|
+
CRS.from_wkt(source_crs_wkt),
|
|
31
|
+
target_crs,
|
|
32
|
+
always_xy=True, # Ensures consistent x,y order
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def rt2lonlat(raster: "RasterTransform") -> tuple[float, float]:
|
|
37
|
+
"""
|
|
38
|
+
Calculate the geographic centroid in WGS84 with optimized performance.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
raster: RasterTransform instance with geospatial metadata
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
Tuple of (longitude, latitude) in WGS84 coordinates
|
|
45
|
+
"""
|
|
46
|
+
# Calculate pixel coordinates of raster center
|
|
47
|
+
col_center = (raster.width - 1) / 2.0
|
|
48
|
+
row_center = (raster.height - 1) / 2.0
|
|
49
|
+
|
|
50
|
+
# Extract geotransform parameters as local variables for faster access
|
|
51
|
+
gt = raster.geotransform
|
|
52
|
+
tx = gt["translateX"]
|
|
53
|
+
sx = gt["scaleX"]
|
|
54
|
+
shx = gt["shearX"]
|
|
55
|
+
ty = gt["translateY"]
|
|
56
|
+
shy = gt["shearY"]
|
|
57
|
+
sy = gt["scaleY"]
|
|
58
|
+
|
|
59
|
+
# Apply affine transformation
|
|
60
|
+
x = tx + sx * col_center + shx * row_center
|
|
61
|
+
y = ty + shy * col_center + sy * row_center
|
|
62
|
+
|
|
63
|
+
# Check if transformation is needed
|
|
64
|
+
source_crs = CRS.from_user_input(raster.crs)
|
|
65
|
+
target_crs = CRS.from_epsg(4326)
|
|
66
|
+
|
|
67
|
+
if source_crs == target_crs:
|
|
68
|
+
return (x, y)
|
|
69
|
+
|
|
70
|
+
# Perform the transformation
|
|
71
|
+
transformer = get_transformer(source_crs.to_wkt())
|
|
72
|
+
lon, lat = transformer.transform(x, y)
|
|
73
|
+
|
|
74
|
+
return lon, lat, x, y
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class GeotransformDict(TypedDict):
|
|
78
|
+
"""
|
|
79
|
+
Type definition for a geotransform dictionary containing spatial transformation parameters.
|
|
80
|
+
|
|
81
|
+
Attributes:
|
|
82
|
+
scaleX (NumberType): The scaling factor in the X direction.
|
|
83
|
+
shearX (NumberType): The shear factor in the X direction.
|
|
84
|
+
translateX (NumberType): The translation in the X direction.
|
|
85
|
+
scaleY (NumberType): The scaling factor in the Y direction.
|
|
86
|
+
shearY (NumberType): The shear factor in the Y direction.
|
|
87
|
+
translateY (NumberType): The translation in the Y direction.
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
scaleX: NumberType
|
|
91
|
+
shearX: NumberType
|
|
92
|
+
translateX: NumberType
|
|
93
|
+
scaleY: NumberType
|
|
94
|
+
shearY: NumberType
|
|
95
|
+
translateY: NumberType
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class RasterTransform(BaseModel):
|
|
99
|
+
"""
|
|
100
|
+
Represents a single geospatial metadata entry with CRS and transformation information.
|
|
101
|
+
|
|
102
|
+
Attributes:
|
|
103
|
+
id (str): The unique identifier for the raster metadata.
|
|
104
|
+
crs (str): The Coordinate Reference System string (EPSG code or WKT).
|
|
105
|
+
geotransform (GeotransformDict): A dictionary containing spatial transformation parameters.
|
|
106
|
+
width (int): Raster width in pixels.
|
|
107
|
+
height (int): Raster height in pixels.
|
|
108
|
+
|
|
109
|
+
Example:
|
|
110
|
+
>>> metadata = RasterTransform(
|
|
111
|
+
... id="image1",
|
|
112
|
+
... crs="EPSG:4326",
|
|
113
|
+
... geotransform={
|
|
114
|
+
... 'scaleX': 1.0, 'shearX': 0, 'translateX': 100.0,
|
|
115
|
+
... 'scaleY': 1.0, 'shearY': 0, 'translateY': 200.0
|
|
116
|
+
... },
|
|
117
|
+
... width=1000,
|
|
118
|
+
... height=1000
|
|
119
|
+
... )
|
|
120
|
+
RasterTransform(crs='EPSG:4326', width=1000, height=1000)
|
|
121
|
+
"""
|
|
122
|
+
|
|
123
|
+
crs: str
|
|
124
|
+
geotransform: GeotransformDict
|
|
125
|
+
width: int
|
|
126
|
+
height: int
|
|
127
|
+
|
|
128
|
+
@model_validator(mode="before")
|
|
129
|
+
def validate_geotransform(cls, values):
|
|
130
|
+
"""
|
|
131
|
+
Validates the geotransform dictionary to ensure it has the required keys
|
|
132
|
+
and that all values are numeric and non-zero where applicable.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
values (dict): The input values being validated.
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
dict: The validated values.
|
|
139
|
+
|
|
140
|
+
Raises:
|
|
141
|
+
ValueError: If validation fails for missing keys, invalid types, or zero scale values.
|
|
142
|
+
"""
|
|
143
|
+
geotransform = values.get("geotransform")
|
|
144
|
+
|
|
145
|
+
if not isinstance(geotransform, dict):
|
|
146
|
+
raise ValueError(
|
|
147
|
+
f"Expected geotransform to be a dictionary, got {type(geotransform)}"
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
missing_keys = REQUIRED_KEYS - set(geotransform.keys())
|
|
151
|
+
if missing_keys:
|
|
152
|
+
raise ValueError(f"Missing required keys: {missing_keys}")
|
|
153
|
+
|
|
154
|
+
extra_keys = set(geotransform.keys()) - REQUIRED_KEYS
|
|
155
|
+
if extra_keys:
|
|
156
|
+
raise ValueError(f"Unexpected keys found: {extra_keys}")
|
|
157
|
+
|
|
158
|
+
for key in REQUIRED_KEYS:
|
|
159
|
+
if not isinstance(geotransform[key], (int, float)):
|
|
160
|
+
raise ValueError(f"Value for '{key}' must be numeric (int or float)")
|
|
161
|
+
|
|
162
|
+
if not (geotransform["scaleX"] and geotransform["scaleY"]):
|
|
163
|
+
raise ValueError("Scale values cannot be zero")
|
|
164
|
+
|
|
165
|
+
return values
|
|
166
|
+
|
|
167
|
+
@field_validator("width", "height")
|
|
168
|
+
@classmethod
|
|
169
|
+
def validate_positive(cls, value: int, field) -> int:
|
|
170
|
+
"""
|
|
171
|
+
Validates that width and height are positive integers.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
value (int): The value of width or height to validate.
|
|
175
|
+
field: The field being validated (width or height).
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
int: The validated value.
|
|
179
|
+
|
|
180
|
+
Raises:
|
|
181
|
+
ValueError: If the value is not positive.
|
|
182
|
+
"""
|
|
183
|
+
if value <= 0:
|
|
184
|
+
raise ValueError(
|
|
185
|
+
f"{field.field_name} must be positive and greater than zero, but got {value}"
|
|
186
|
+
)
|
|
187
|
+
return value
|
|
188
|
+
|
|
189
|
+
def __str__(self) -> str:
|
|
190
|
+
"""
|
|
191
|
+
Provides a string representation of the RasterTransform instance with a table
|
|
192
|
+
format for the geotransform parameters.
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
str: A formatted string showing the raster metadata and geotransform.
|
|
196
|
+
"""
|
|
197
|
+
geotransform_data = {
|
|
198
|
+
"Parameter": [
|
|
199
|
+
"scaleX",
|
|
200
|
+
"shearX",
|
|
201
|
+
"translateX",
|
|
202
|
+
"scaleY",
|
|
203
|
+
"shearY",
|
|
204
|
+
"translateY",
|
|
205
|
+
],
|
|
206
|
+
"Value": [
|
|
207
|
+
self.geotransform["scaleX"],
|
|
208
|
+
self.geotransform["shearX"],
|
|
209
|
+
self.geotransform["translateX"],
|
|
210
|
+
self.geotransform["scaleY"],
|
|
211
|
+
self.geotransform["shearY"],
|
|
212
|
+
self.geotransform["translateY"],
|
|
213
|
+
],
|
|
214
|
+
}
|
|
215
|
+
geotransform_df = pd.DataFrame(geotransform_data)
|
|
216
|
+
return f"RasterTransform(crs={self.crs}, width={self.width}, height={self.height})\n\nGeotransform:\n{geotransform_df}"
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
class Request(BaseModel):
|
|
220
|
+
id: str
|
|
221
|
+
raster_transform: RasterTransform
|
|
222
|
+
image: Any
|
|
223
|
+
bands: List[str]
|
|
224
|
+
_expression_key: str = None
|
|
225
|
+
|
|
226
|
+
@model_validator(mode="after")
|
|
227
|
+
def validate_image(self):
|
|
228
|
+
|
|
229
|
+
if isinstance(self.image, ee.Image):
|
|
230
|
+
self.image = self.image.serialize()
|
|
231
|
+
self._expression_key = "expression"
|
|
232
|
+
# to avoid reading serialization of an ee.Image as str in RequestSet
|
|
233
|
+
elif isinstance(self.image, str) and self.image.strip().startswith("{"):
|
|
234
|
+
self._expression_key = "expression"
|
|
235
|
+
else:
|
|
236
|
+
self.image = self.image
|
|
237
|
+
self._expression_key = "assetId"
|
|
238
|
+
|
|
239
|
+
return self
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
class RequestSet(BaseModel):
|
|
243
|
+
"""
|
|
244
|
+
Container for multiple RasterTransform instances with bulk validation capabilities.
|
|
245
|
+
|
|
246
|
+
Attributes:
|
|
247
|
+
rastertransformset (List[RasterTransform]): A list of RasterTransform metadata entries.
|
|
248
|
+
|
|
249
|
+
Example:
|
|
250
|
+
>>> metadatas = RasterTransformSet(rastertransformset=[metadata1, metadata2])
|
|
251
|
+
>>> df = metadatas.export_df()
|
|
252
|
+
"""
|
|
253
|
+
|
|
254
|
+
requestset: List[Request]
|
|
255
|
+
_dataframe: pd.DataFrame | None = None
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def create_manifests(self) -> pd.DataFrame:
|
|
260
|
+
"""
|
|
261
|
+
Exports the raster metadata to a pandas DataFrame.
|
|
262
|
+
|
|
263
|
+
Returns:
|
|
264
|
+
pd.DataFrame: A DataFrame containing the metadata for all entries.
|
|
265
|
+
|
|
266
|
+
Example:
|
|
267
|
+
>>> df = raster_transform_set.export_df()
|
|
268
|
+
>>> print(df)
|
|
269
|
+
"""
|
|
270
|
+
# Use ProcessPoolExecutor for CPU-bound tasks to convert raster transforms to lon/lat
|
|
271
|
+
with ProcessPoolExecutor(max_workers=None) as executor:
|
|
272
|
+
# Submit all tasks to the executor
|
|
273
|
+
futures = [
|
|
274
|
+
executor.submit(rt2lonlat, rt.raster_transform)
|
|
275
|
+
for rt in self.requestset
|
|
276
|
+
]
|
|
277
|
+
|
|
278
|
+
# Collect results as they complete
|
|
279
|
+
points = [future.result() for future in futures]
|
|
280
|
+
lon, lat, x, y = zip(*points)
|
|
281
|
+
|
|
282
|
+
return pd.DataFrame(
|
|
283
|
+
[
|
|
284
|
+
{
|
|
285
|
+
"id": meta.id,
|
|
286
|
+
"lon": lon[index],
|
|
287
|
+
"lat": lat[index],
|
|
288
|
+
"x": x[index],
|
|
289
|
+
"y": y[index],
|
|
290
|
+
"crs": meta.raster_transform.crs,
|
|
291
|
+
"width": meta.raster_transform.width,
|
|
292
|
+
"height": meta.raster_transform.height,
|
|
293
|
+
"geotransform": meta.raster_transform.geotransform,
|
|
294
|
+
"scale_x": meta.raster_transform.geotransform["scaleX"],
|
|
295
|
+
"scale_y": meta.raster_transform.geotransform["scaleY"],
|
|
296
|
+
"manifest": {
|
|
297
|
+
meta._expression_key: meta.image,
|
|
298
|
+
"fileFormat": "GEO_TIFF",
|
|
299
|
+
"bandIds": meta.bands,
|
|
300
|
+
"grid": {
|
|
301
|
+
"dimensions": {
|
|
302
|
+
"width": meta.raster_transform.width,
|
|
303
|
+
"height": meta.raster_transform.height,
|
|
304
|
+
},
|
|
305
|
+
"affineTransform": meta.raster_transform.geotransform,
|
|
306
|
+
"crsCode": meta.raster_transform.crs,
|
|
307
|
+
},
|
|
308
|
+
},
|
|
309
|
+
"outname": f"{meta.id}.tif",
|
|
310
|
+
}
|
|
311
|
+
for index, meta in enumerate(self.requestset)
|
|
312
|
+
]
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def _validate_dataframe_schema(self) -> None:
|
|
317
|
+
"""
|
|
318
|
+
Checks that the `_dataframe` contains the required columns and that each column
|
|
319
|
+
has the expected data type. Also verifies that the `manifest` field has the
|
|
320
|
+
necessary minimum structure.
|
|
321
|
+
"""
|
|
322
|
+
|
|
323
|
+
# A) Required columns and expected data types
|
|
324
|
+
required_columns = {
|
|
325
|
+
"id": str,
|
|
326
|
+
"lon": float,
|
|
327
|
+
"lat": float,
|
|
328
|
+
"x": float,
|
|
329
|
+
"y": float,
|
|
330
|
+
"crs": str,
|
|
331
|
+
"width": int,
|
|
332
|
+
"height": int,
|
|
333
|
+
"geotransform": dict,
|
|
334
|
+
"scale_x": (int, float),
|
|
335
|
+
"scale_y": (int, float),
|
|
336
|
+
"manifest": dict,
|
|
337
|
+
"outname": str,
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
df_cols = set(self._dataframe.columns)
|
|
341
|
+
required_cols = set(required_columns.keys())
|
|
342
|
+
|
|
343
|
+
# 1. Check for missing columns
|
|
344
|
+
missing_cols = required_cols - df_cols
|
|
345
|
+
if missing_cols:
|
|
346
|
+
raise ValueError(f"Missing required columns in dataframe: {missing_cols}")
|
|
347
|
+
|
|
348
|
+
# 2. (Optional) Check for extra columns
|
|
349
|
+
# extra_cols = df_cols - required_cols
|
|
350
|
+
# if extra_cols:
|
|
351
|
+
# raise ValueError(f"Unexpected extra columns in dataframe: {extra_cols}")
|
|
352
|
+
|
|
353
|
+
# 3. Verify data types (basic check)
|
|
354
|
+
for col_name, expected_type in required_columns.items():
|
|
355
|
+
for i, value in enumerate(self._dataframe[col_name]):
|
|
356
|
+
if not isinstance(value, expected_type):
|
|
357
|
+
# For cases like (int, float), you can check with isinstance(value, (int, float))
|
|
358
|
+
# or directly use the tuple in `expected_type`
|
|
359
|
+
if isinstance(expected_type, tuple):
|
|
360
|
+
if not any(isinstance(value, t) for t in expected_type):
|
|
361
|
+
raise ValueError(
|
|
362
|
+
f"Column '{col_name}' has an invalid type in row {i}. "
|
|
363
|
+
f"Expected one of {expected_type}, got {type(value)}"
|
|
364
|
+
)
|
|
365
|
+
else:
|
|
366
|
+
raise ValueError(
|
|
367
|
+
f"Column '{col_name}' has an invalid type in row {i}. "
|
|
368
|
+
f"Expected {expected_type}, got {type(value)}"
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
# B) Validation of the `manifest` column structure
|
|
372
|
+
# - Must contain at least 'assetId' or 'expression'
|
|
373
|
+
# - Must contain 'grid' with the minimum required sub-keys
|
|
374
|
+
# - Example:
|
|
375
|
+
# {
|
|
376
|
+
# "fileFormat": "GEO_TIFF",
|
|
377
|
+
# "bandIds": [...],
|
|
378
|
+
# "grid": {
|
|
379
|
+
# "dimensions": {"width": ..., "height": ...},
|
|
380
|
+
# "affineTransform": {...},
|
|
381
|
+
# "crsCode": ...
|
|
382
|
+
# },
|
|
383
|
+
# // Either "assetId" or "expression" must be here
|
|
384
|
+
# }
|
|
385
|
+
for i, row in self._dataframe.iterrows():
|
|
386
|
+
manifest = row["manifest"]
|
|
387
|
+
|
|
388
|
+
# Main required keys
|
|
389
|
+
for key in ["fileFormat", "bandIds", "grid"]:
|
|
390
|
+
if key not in manifest:
|
|
391
|
+
raise ValueError(
|
|
392
|
+
f"Missing key '{key}' in 'manifest' for row index {i}"
|
|
393
|
+
)
|
|
394
|
+
|
|
395
|
+
# At least one of 'assetId' or 'expression'
|
|
396
|
+
if not any(k in manifest for k in ["assetId", "expression"]):
|
|
397
|
+
raise ValueError(
|
|
398
|
+
f"Manifest in row {i} does not contain 'assetId' or 'expression'"
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
# Basic validation of 'grid'
|
|
402
|
+
grid = manifest["grid"]
|
|
403
|
+
for subkey in ["dimensions", "affineTransform", "crsCode"]:
|
|
404
|
+
if subkey not in grid:
|
|
405
|
+
raise ValueError(
|
|
406
|
+
f"Missing key '{subkey}' in 'manifest.grid' for row index {i}"
|
|
407
|
+
)
|
|
408
|
+
|
|
409
|
+
# Basic validation of 'dimensions'
|
|
410
|
+
dims = grid["dimensions"]
|
|
411
|
+
for dim_key in ["width", "height"]:
|
|
412
|
+
if dim_key not in dims:
|
|
413
|
+
raise ValueError(
|
|
414
|
+
f"Missing '{dim_key}' in 'manifest.grid.dimensions' for row {i}"
|
|
415
|
+
)
|
|
416
|
+
if not isinstance(dims[dim_key], int) or dims[dim_key] <= 0:
|
|
417
|
+
raise ValueError(
|
|
418
|
+
f"'{dim_key}' in 'manifest.grid.dimensions' must be a positive integer. "
|
|
419
|
+
f"Row {i} has value {dims[dim_key]}"
|
|
420
|
+
)
|
|
421
|
+
|
|
422
|
+
# Basic validation of 'affineTransform'
|
|
423
|
+
aff = grid["affineTransform"]
|
|
424
|
+
for a_key in ["scaleX", "shearX", "translateX", "scaleY", "shearY", "translateY"]:
|
|
425
|
+
if a_key not in aff:
|
|
426
|
+
raise ValueError(
|
|
427
|
+
f"Missing '{a_key}' in 'manifest.grid.affineTransform' for row {i}"
|
|
428
|
+
)
|
|
429
|
+
if not isinstance(aff[a_key], (int, float)):
|
|
430
|
+
raise ValueError(
|
|
431
|
+
f"Value for '{a_key}' in 'manifest.grid.affineTransform' must be numeric. "
|
|
432
|
+
f"Row {i} has {type(aff[a_key])}."
|
|
433
|
+
)
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
@model_validator(mode="after")
|
|
438
|
+
def validate_metadata(self) -> RequestSet:
|
|
439
|
+
"""
|
|
440
|
+
Validates that all entries have consistent and valid CRS formats.
|
|
441
|
+
|
|
442
|
+
Returns:
|
|
443
|
+
RasterTransformSet: The validated instance.
|
|
444
|
+
|
|
445
|
+
Raises:
|
|
446
|
+
ValueError: If any CRS is invalid or inconsistent.
|
|
447
|
+
"""
|
|
448
|
+
# 1. Pre-consistency validation (CRS, IDs, etc.)
|
|
449
|
+
crs_set: Set[str] = {meta.raster_transform.crs for meta in self.requestset}
|
|
450
|
+
validated_crs: Set[str] = set()
|
|
451
|
+
|
|
452
|
+
# Validate CRS formats
|
|
453
|
+
for crs in crs_set:
|
|
454
|
+
if crs not in validated_crs:
|
|
455
|
+
try:
|
|
456
|
+
CRS.from_string(crs)
|
|
457
|
+
validated_crs.add(crs)
|
|
458
|
+
except Exception as e:
|
|
459
|
+
raise ValueError(f"Invalid CRS format: {crs}") from e
|
|
460
|
+
|
|
461
|
+
# Validate ids, they must be unique
|
|
462
|
+
ids = {meta.id for meta in self.requestset}
|
|
463
|
+
if len(ids) != len(self.requestset):
|
|
464
|
+
raise ValueError("All entries must have unique IDs")
|
|
465
|
+
|
|
466
|
+
# Upgrade same_coordinates to True if all coordinates are the same
|
|
467
|
+
# 2. We create the dataframe
|
|
468
|
+
self._dataframe = self.create_manifests()
|
|
469
|
+
|
|
470
|
+
# 3. We validate the structure of the dataframe
|
|
471
|
+
self._validate_dataframe_schema()
|
|
472
|
+
|
|
473
|
+
return self
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
|
|
477
|
+
def __repr__(self) -> str:
|
|
478
|
+
"""
|
|
479
|
+
Provides a string representation of the metadata set including a table of all entries.
|
|
480
|
+
|
|
481
|
+
Returns:
|
|
482
|
+
str: A string representation of the entire RasterTransformSet.
|
|
483
|
+
"""
|
|
484
|
+
num_entries = len(self.requestset)
|
|
485
|
+
return f"RasterTransformSet({num_entries} entries)"
|
|
486
|
+
|
|
487
|
+
def __str__(self):
|
|
488
|
+
return super().__repr__()
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025, AndesDataCube
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
22
|
+
|
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: cubexpress
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A Python package for efficient processing of cubic earth observation (EO) data
|
|
5
|
+
Home-page: https://github.com/andesdatacube/cubexpress/
|
|
6
|
+
License: MIT
|
|
7
|
+
Author: Julio Contreras
|
|
8
|
+
Author-email: contrerasnetk@gmail.com
|
|
9
|
+
Requires-Python: >=3.9,<4.0
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Requires-Dist: numpy (>=1.25.2)
|
|
17
|
+
Requires-Dist: pandas (>=2.0.3)
|
|
18
|
+
Requires-Dist: utm (>=0.8.0,<0.9.0)
|
|
19
|
+
Project-URL: Documentation, https://andesdatacube.github.io/cubexpress/
|
|
20
|
+
Project-URL: Repository, https://github.com/andesdatacube/cubexpress/
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
|
|
23
|
+
<h1></h1>
|
|
24
|
+
|
|
25
|
+
<p align="center">
|
|
26
|
+
<img src="./docs/logo_cubexpress.png" width="39%">
|
|
27
|
+
</p>
|
|
28
|
+
|
|
29
|
+
<p align="center">
|
|
30
|
+
<em>A Python package for efficient processing of cubic earth observation (EO) data</em> 🚀
|
|
31
|
+
</p>
|
|
32
|
+
|
|
33
|
+
<p align="center">
|
|
34
|
+
<a href='https://pypi.python.org/pypi/cubexpress'>
|
|
35
|
+
<img src='https://img.shields.io/pypi/v/cubexpress.svg' alt='PyPI' />
|
|
36
|
+
</a>
|
|
37
|
+
<a href="https://opensource.org/licenses/MIT" target="_blank">
|
|
38
|
+
<img src="https://img.shields.io/badge/License-MIT-blue.svg" alt="License">
|
|
39
|
+
</a>
|
|
40
|
+
<a href="https://github.com/psf/black" target="_blank">
|
|
41
|
+
<img src="https://img.shields.io/badge/code%20style-black-000000.svg" alt="Black">
|
|
42
|
+
</a>
|
|
43
|
+
<a href="https://pycqa.github.io/isort/" target="_blank">
|
|
44
|
+
<img src="https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336" alt="isort">
|
|
45
|
+
</a>
|
|
46
|
+
</p>
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
**GitHub**: [https://github.com/andesdatacube/cubexpress/](https://github.com/andesdatacube/cubexpress/) 🌐
|
|
51
|
+
|
|
52
|
+
**PyPI**: [https://pypi.org/project/cubexpress/](https://pypi.org/project/cubexpress/) 🛠️
|
|
53
|
+
|
|
54
|
+
---
|
|
55
|
+
|
|
56
|
+
## **Overview**
|
|
57
|
+
|
|
58
|
+
**CubeXpress** is a Python package designed to **simplify and accelerate** the process of working with Google Earth Engine (GEE) data cubes. With features like multi-threaded downloads, automatic subdivision of large requests, and direct pixel-level computations on GEE, **CubeXpress** helps you handle massive datasets with ease.
|
|
59
|
+
|
|
60
|
+
## **Key Features**
|
|
61
|
+
- **Fast Image and Collection Downloads**
|
|
62
|
+
Retrieve single images or entire collections at once, taking advantage of multi-threaded requests.
|
|
63
|
+
- **Automatic Tiling**
|
|
64
|
+
Large images are split ("quadsplit") into smaller sub-tiles, preventing errors with GEE’s size limits.
|
|
65
|
+
- **Direct Pixel Computations**
|
|
66
|
+
Perform computations (e.g., band math) directly on GEE, then fetch results in a single step.
|
|
67
|
+
- **Scalable & Efficient**
|
|
68
|
+
Optimized memory usage and parallelism let you handle complex tasks in big data environments.
|
|
69
|
+
|
|
70
|
+
## **Installation**
|
|
71
|
+
Install the latest version from PyPI:
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
pip install cubexpress
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
> **Note**: You need a valid Google Earth Engine account and `earthengine-api` installed (`pip install earthengine-api`). Also run `ee.Initialize()` before using CubeXpress.
|
|
78
|
+
|
|
79
|
+
---
|
|
80
|
+
|
|
81
|
+
## **Basic Usage**
|
|
82
|
+
|
|
83
|
+
### **Download a single `ee.Image`**
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
import ee
|
|
87
|
+
import cubexpress
|
|
88
|
+
|
|
89
|
+
# Initialize Earth Engine
|
|
90
|
+
ee.Initialize(project="your-project-id")
|
|
91
|
+
|
|
92
|
+
# Create a raster transform
|
|
93
|
+
geotransform = cubexpress.lonlat2rt(
|
|
94
|
+
lon=-76.5,
|
|
95
|
+
lat=-9.5,
|
|
96
|
+
edge_size=128, # Width=Height=128 pixels
|
|
97
|
+
scale=90 # 90m resolution
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# Define a single Request
|
|
101
|
+
request = cubexpress.Request(
|
|
102
|
+
id="dem_test",
|
|
103
|
+
raster_transform=geotransform,
|
|
104
|
+
bands=["elevation"],
|
|
105
|
+
image="NASA/NASADEM_HGT/001" # Note: you can wrap with ee.Image("NASA/NASADEM_HGT/001").divide(10000) if needed
|
|
106
|
+
|
|
107
|
+
# Build the RequestSet
|
|
108
|
+
cube_requests = cubexpress.RequestSet(requestset=[request])
|
|
109
|
+
|
|
110
|
+
# Download with multi-threading
|
|
111
|
+
cubexpress.getcube(
|
|
112
|
+
request=cube_requests,
|
|
113
|
+
output_path="output_dem",
|
|
114
|
+
nworkers=4,
|
|
115
|
+
max_deep_level=5
|
|
116
|
+
)
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
This will create a GeoTIFF named `dem_test.tif` in the `output_dem` folder, containing the elevation band.
|
|
120
|
+
|
|
121
|
+
---
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
### **Download pixel values from an ee.ImageCollection**
|
|
125
|
+
|
|
126
|
+
You can fetch multiple images by constructing a `RequestSet` with several `Request` objects. For example, filter Sentinel-2 images near a point:
|
|
127
|
+
|
|
128
|
+
```python
|
|
129
|
+
import ee
|
|
130
|
+
import cubexpress
|
|
131
|
+
|
|
132
|
+
ee.Initialize(project="your-project-id")
|
|
133
|
+
|
|
134
|
+
# Filter a Sentinel-2 collection
|
|
135
|
+
point = ee.Geometry.Point([-97.59, 33.37])
|
|
136
|
+
collection = ee.ImageCollection("COPERNICUS/S2_SR_HARMONIZED") \
|
|
137
|
+
.filterBounds(point) \
|
|
138
|
+
.filterDate('2024-01-01', '2024-01-31')
|
|
139
|
+
|
|
140
|
+
# Extract image IDs
|
|
141
|
+
image_ids = collection.aggregate_array('system:id').getInfo()
|
|
142
|
+
|
|
143
|
+
# Set the geotransform
|
|
144
|
+
geotransform = cubexpress.lonlat2rt(
|
|
145
|
+
lon=-97.59,
|
|
146
|
+
lat=33.37,
|
|
147
|
+
edge_size=512,
|
|
148
|
+
scale=10
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
# Build multiple requests
|
|
152
|
+
requests = [
|
|
153
|
+
cubexpress.Request(
|
|
154
|
+
id=f"s2test_{i}",
|
|
155
|
+
raster_transform=geotransform,
|
|
156
|
+
bands=["B4", "B3", "B2"],
|
|
157
|
+
image=image_id # Note: you can wrap with ee.Image(image_id).divide(10000) if needed
|
|
158
|
+
)
|
|
159
|
+
for i, image_id in enumerate(image_ids)
|
|
160
|
+
]
|
|
161
|
+
|
|
162
|
+
# Create the RequestSet
|
|
163
|
+
cube_requests = cubexpress.RequestSet(requestset=requests)
|
|
164
|
+
|
|
165
|
+
# Download
|
|
166
|
+
cubexpress.getcube(
|
|
167
|
+
request=cube_requests,
|
|
168
|
+
output_path="output_sentinel",
|
|
169
|
+
nworkers=4,
|
|
170
|
+
max_deep_level=5
|
|
171
|
+
)
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
---
|
|
175
|
+
|
|
176
|
+
### **Process and extract a pixel from an ee.Image**
|
|
177
|
+
If you provide an `ee.Image` with custom calculations (e.g., `.divide(10000)`, `.normalizedDifference(...)`), CubeXpress can run those on GEE, then download the result. For large results, it automatically splits the image into sub-tiles.
|
|
178
|
+
|
|
179
|
+
```python
|
|
180
|
+
import ee
|
|
181
|
+
import cubexpress
|
|
182
|
+
|
|
183
|
+
ee.Initialize(project="your-project-id")
|
|
184
|
+
|
|
185
|
+
# Example: NDVI from Sentinel-2
|
|
186
|
+
image = ee.Image("COPERNICUS/S2_HARMONIZED/20170804T154911_20170804T155116_T18SUJ") \
|
|
187
|
+
.normalizedDifference(["B8", "B4"]) \
|
|
188
|
+
.rename("NDVI")
|
|
189
|
+
|
|
190
|
+
geotransform = cubexpress.lonlat2rt(
|
|
191
|
+
lon=-76.59,
|
|
192
|
+
lat=38.89,
|
|
193
|
+
edge_size=256,
|
|
194
|
+
scale=10
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
request = cubexpress.Request(
|
|
198
|
+
id="ndvi_test",
|
|
199
|
+
raster_transform=geotransform,
|
|
200
|
+
bands=["NDVI"],
|
|
201
|
+
image=image # custom expression
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
cube_requests = cubexpress.RequestSet(requestset=[request])
|
|
205
|
+
|
|
206
|
+
cubexpress.getcube(
|
|
207
|
+
request=cube_requests,
|
|
208
|
+
output_path="output_ndvi",
|
|
209
|
+
nworkers=2,
|
|
210
|
+
max_deep_level=5
|
|
211
|
+
)
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
---
|
|
215
|
+
|
|
216
|
+
## **Advanced Usage**
|
|
217
|
+
|
|
218
|
+
### **Same Set of Sentinel-2 Images for Multiple Points**
|
|
219
|
+
|
|
220
|
+
Below is a **advanced example** demonstrating how to work with **multiple points** and a **Sentinel-2** image collection in one script. We first create a global collection but then filter it on a point-by-point basis, extracting only the images that intersect each coordinate. Finally, we download them in parallel using **CubeXpress**.
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
```python
|
|
224
|
+
import ee
|
|
225
|
+
import cubexpress
|
|
226
|
+
|
|
227
|
+
# Initialize Earth Engine with your project
|
|
228
|
+
ee.Initialize(project="your-project-id")
|
|
229
|
+
|
|
230
|
+
# Define multiple points (longitude, latitude)
|
|
231
|
+
points = [
|
|
232
|
+
(-97.64, 33.37),
|
|
233
|
+
(-97.59, 33.37)
|
|
234
|
+
]
|
|
235
|
+
|
|
236
|
+
# Start with a broad Sentinel-2 collection
|
|
237
|
+
collection = (
|
|
238
|
+
ee.ImageCollection("COPERNICUS/S2_SR_HARMONIZED")
|
|
239
|
+
.filterDate("2024-01-01", "2024-01-31")
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
# Build a list of Request objects
|
|
243
|
+
requestset = []
|
|
244
|
+
for i, (lon, lat) in enumerate(points):
|
|
245
|
+
# Create a point geometry for the current coordinates
|
|
246
|
+
point_geom = ee.Geometry.Point([lon, lat])
|
|
247
|
+
collection_filtered = collection.filterBounds(point_geom)
|
|
248
|
+
|
|
249
|
+
# Convert the filtered collection into a list of asset IDs
|
|
250
|
+
image_ids = collection_filtered.aggregate_array("system:id").getInfo()
|
|
251
|
+
|
|
252
|
+
# Define a geotransform for this point
|
|
253
|
+
geotransform = cubexpress.lonlat2rt(
|
|
254
|
+
lon=lon,
|
|
255
|
+
lat=lat,
|
|
256
|
+
edge_size=512, # Adjust the image size in pixels
|
|
257
|
+
scale=10 # 10m resolution for Sentinel-2
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
# Create one Request per image found for this point
|
|
261
|
+
requestset.extend([
|
|
262
|
+
cubexpress.Request(
|
|
263
|
+
id=f"s2test_{i}_{idx}",
|
|
264
|
+
raster_transform=geotransform,
|
|
265
|
+
bands=["B4", "B3", "B2"],
|
|
266
|
+
image=image_id
|
|
267
|
+
)
|
|
268
|
+
for idx, image_id in enumerate(image_ids)
|
|
269
|
+
])
|
|
270
|
+
|
|
271
|
+
# Combine into a RequestSet
|
|
272
|
+
cube_requests = cubexpress.RequestSet(requestset=requestset)
|
|
273
|
+
|
|
274
|
+
# Download everything in parallel
|
|
275
|
+
results = cubexpress.getcube(
|
|
276
|
+
request=cube_requests,
|
|
277
|
+
nworkers=4,
|
|
278
|
+
output_path="images_s2",
|
|
279
|
+
max_deep_level=5
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
print("Downloaded files:", results)
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
**How it works**:
|
|
287
|
+
|
|
288
|
+
1. **Points:** We define multiple coordinates in `points`.
|
|
289
|
+
2. **Global collection:** We retrieve a broad Sentinel-2 collection covering the desired date range.
|
|
290
|
+
3. **Per-point filter:** For each point, we call `.filterBounds(...)` to get only images intersecting that location.
|
|
291
|
+
4. **Geotransform:** We create a local geotransform (`edge_size`, `scale`) defining the spatial extent and resolution around each point.
|
|
292
|
+
5. **Requests:** Each point-image pair becomes a `Request`, stored in a single list.
|
|
293
|
+
6. **Parallel download:** With `cubexpress.getcube()`, all requests are fetched simultaneously, automatically splitting large outputs into sub-tiles if needed (up to `max_deep_level`).
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
## **License**
|
|
298
|
+
This project is licensed under the [MIT License](https://opensource.org/licenses/MIT).
|
|
299
|
+
|
|
300
|
+
---
|
|
301
|
+
|
|
302
|
+
<p align="center">
|
|
303
|
+
Built with 🌎 and ❤️ by the <strong>CubeXpress</strong> team
|
|
304
|
+
</p>
|
|
305
|
+
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
cubexpress/__init__.py,sha256=1CF6kINn70mfS5HNzYyTf4UsOUPG0qzeetoJSDk0ALw,418
|
|
2
|
+
cubexpress/conversion.py,sha256=h77re8AtdVV_Jy3ugZeQ-e2I8DHSKoghiq70MXkzBaQ,2506
|
|
3
|
+
cubexpress/download.py,sha256=DX5DKPdKiuv1gHxs-5Q5ScZ06nvE-Pi1YGLSzQc2jrs,14315
|
|
4
|
+
cubexpress/geotyping.py,sha256=5JgsOfRfwQf-iBh902wKQ1AxEKw1HgFL2brzwkxO0Pg,17152
|
|
5
|
+
cubexpress-0.1.0.dist-info/LICENSE,sha256=XjoS-d76b7Cl-VgCWhQk83tNf2dNldKBN8SrImwGc2Q,1072
|
|
6
|
+
cubexpress-0.1.0.dist-info/METADATA,sha256=XfBIfpFP1quHSNr60Dn6R8EEpdq02XJWCepwhl7j7U0,9327
|
|
7
|
+
cubexpress-0.1.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
8
|
+
cubexpress-0.1.0.dist-info/RECORD,,
|