eotdl 2023.11.2.post5__py3-none-any.whl → 2023.11.3.post2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eotdl/__init__.py +1 -1
- eotdl/access/__init__.py +6 -3
- eotdl/access/airbus/__init__.py +5 -1
- eotdl/access/airbus/client.py +356 -338
- eotdl/access/airbus/parameters.py +19 -4
- eotdl/access/airbus/utils.py +26 -21
- eotdl/access/download.py +30 -14
- eotdl/access/search.py +17 -6
- eotdl/access/sentinelhub/__init__.py +5 -1
- eotdl/access/sentinelhub/client.py +57 -54
- eotdl/access/sentinelhub/evalscripts.py +38 -39
- eotdl/access/sentinelhub/parameters.py +43 -23
- eotdl/access/sentinelhub/utils.py +38 -28
- eotdl/auth/errors.py +2 -1
- eotdl/commands/auth.py +3 -3
- eotdl/curation/__init__.py +5 -1
- eotdl/curation/stac/__init__.py +5 -1
- eotdl/curation/stac/assets.py +55 -32
- eotdl/curation/stac/dataframe.py +20 -14
- eotdl/curation/stac/dataframe_bck.py +2 -2
- eotdl/curation/stac/dataframe_labeling.py +15 -12
- eotdl/curation/stac/extensions/__init__.py +6 -2
- eotdl/curation/stac/extensions/base.py +8 -4
- eotdl/curation/stac/extensions/dem.py +6 -3
- eotdl/curation/stac/extensions/eo.py +10 -6
- eotdl/curation/stac/extensions/label/__init__.py +5 -1
- eotdl/curation/stac/extensions/label/base.py +40 -26
- eotdl/curation/stac/extensions/label/image_name_labeler.py +64 -43
- eotdl/curation/stac/extensions/label/scaneo.py +59 -56
- eotdl/curation/stac/extensions/ml_dataset.py +154 -56
- eotdl/curation/stac/extensions/projection.py +11 -9
- eotdl/curation/stac/extensions/raster.py +22 -14
- eotdl/curation/stac/extensions/sar.py +12 -7
- eotdl/curation/stac/extent.py +67 -40
- eotdl/curation/stac/parsers.py +18 -10
- eotdl/curation/stac/stac.py +81 -62
- eotdl/datasets/__init__.py +1 -1
- eotdl/datasets/download.py +42 -55
- eotdl/datasets/ingest.py +68 -11
- eotdl/files/__init__.py +1 -1
- eotdl/files/ingest.py +3 -1
- eotdl/models/download.py +1 -1
- eotdl/repos/AuthAPIRepo.py +0 -1
- eotdl/repos/DatasetsAPIRepo.py +22 -146
- eotdl/repos/FilesAPIRepo.py +7 -92
- eotdl/repos/ModelsAPIRepo.py +0 -1
- eotdl/tools/__init__.py +5 -1
- eotdl/tools/geo_utils.py +78 -48
- eotdl/tools/metadata.py +13 -11
- eotdl/tools/paths.py +14 -14
- eotdl/tools/stac.py +36 -31
- eotdl/tools/time_utils.py +53 -26
- eotdl/tools/tools.py +84 -50
- {eotdl-2023.11.2.post5.dist-info → eotdl-2023.11.3.post2.dist-info}/METADATA +5 -3
- eotdl-2023.11.3.post2.dist-info/RECORD +84 -0
- eotdl-2023.11.2.post5.dist-info/RECORD +0 -84
- {eotdl-2023.11.2.post5.dist-info → eotdl-2023.11.3.post2.dist-info}/WHEEL +0 -0
- {eotdl-2023.11.2.post5.dist-info → eotdl-2023.11.3.post2.dist-info}/entry_points.txt +0 -0
eotdl/tools/geo_utils.py
CHANGED
@@ -1,9 +1,15 @@
|
|
1
|
+
"""
|
2
|
+
Geo Utils
|
3
|
+
"""
|
4
|
+
|
5
|
+
import tarfile
|
6
|
+
from typing import Union
|
7
|
+
from statistics import mean
|
8
|
+
|
1
9
|
import geopandas as gpd
|
2
10
|
import rasterio
|
3
11
|
import rasterio.warp
|
4
|
-
import tarfile
|
5
12
|
|
6
|
-
from typing import Union
|
7
13
|
from shapely import geometry
|
8
14
|
from shapely.geometry import box, Polygon, shape
|
9
15
|
from pyproj import Transformer
|
@@ -12,9 +18,12 @@ from pandas import isna
|
|
12
18
|
|
13
19
|
|
14
20
|
def is_bounding_box(bbox: list) -> bool:
|
21
|
+
"""
|
22
|
+
Check if the given bounding box is a bounding box and is valid
|
23
|
+
"""
|
15
24
|
if not isinstance(bbox, (list, tuple)) or len(bbox) != 4:
|
16
25
|
return False
|
17
|
-
|
26
|
+
|
18
27
|
for value in bbox:
|
19
28
|
if not isinstance(value, (int, float)):
|
20
29
|
return False
|
@@ -27,13 +36,19 @@ def is_bounding_box(bbox: list) -> bool:
|
|
27
36
|
|
28
37
|
|
29
38
|
def compute_image_size(bounding_box, parameters):
|
39
|
+
"""
|
40
|
+
Compute the image size from the bounding box and the resolution
|
41
|
+
"""
|
30
42
|
bbox = BBox(bbox=bounding_box, crs=CRS.WGS84)
|
31
43
|
bbox_size = bbox_to_dimensions(bbox, resolution=parameters.RESOLUTION)
|
32
44
|
|
33
|
-
return bbox, bbox_size
|
45
|
+
return bbox, bbox_size
|
34
46
|
|
35
47
|
|
36
48
|
def get_image_bbox(raster: Union[tarfile.ExFileObject, str]):
|
49
|
+
"""
|
50
|
+
Get the bounding box of a raster
|
51
|
+
"""
|
37
52
|
with rasterio.open(raster) as src:
|
38
53
|
bounds = src.bounds
|
39
54
|
dst_crs = "EPSG:4326"
|
@@ -45,6 +60,9 @@ def get_image_bbox(raster: Union[tarfile.ExFileObject, str]):
|
|
45
60
|
|
46
61
|
|
47
62
|
def get_image_resolution(raster: Union[tarfile.ExFileObject, str]):
|
63
|
+
"""
|
64
|
+
Get the resolution of a raster
|
65
|
+
"""
|
48
66
|
with rasterio.open(raster) as src:
|
49
67
|
resolution = src.res
|
50
68
|
return resolution
|
@@ -52,13 +70,14 @@ def get_image_resolution(raster: Union[tarfile.ExFileObject, str]):
|
|
52
70
|
|
53
71
|
def bbox_to_coordinates(bounding_box: list) -> list:
|
54
72
|
"""
|
73
|
+
Convert a bounding box to a list of polygon coordinates
|
55
74
|
"""
|
56
75
|
polygon_coordinates = [
|
57
76
|
(bounding_box[0], bounding_box[1]), # bottom left
|
58
77
|
(bounding_box[0], bounding_box[3]), # top left
|
59
78
|
(bounding_box[2], bounding_box[3]), # top right
|
60
79
|
(bounding_box[2], bounding_box[1]), # bottom right
|
61
|
-
(bounding_box[0], bounding_box[1])
|
80
|
+
(bounding_box[0], bounding_box[1]), # back to bottom left
|
62
81
|
]
|
63
82
|
|
64
83
|
return polygon_coordinates
|
@@ -66,22 +85,24 @@ def bbox_to_coordinates(bounding_box: list) -> list:
|
|
66
85
|
|
67
86
|
def bbox_to_polygon(bounding_box: list) -> Polygon:
|
68
87
|
"""
|
88
|
+
Convert a bounding box to a shapely polygon
|
69
89
|
"""
|
70
90
|
polygon = box(bounding_box[0], bounding_box[1], bounding_box[2], bounding_box[3])
|
71
91
|
|
72
92
|
return polygon
|
73
93
|
|
74
94
|
|
75
|
-
from_4326_transformer = Transformer.from_crs(
|
76
|
-
from_3857_transformer = Transformer.from_crs(
|
95
|
+
from_4326_transformer = Transformer.from_crs("EPSG:4326", "EPSG:3857")
|
96
|
+
from_3857_transformer = Transformer.from_crs("EPSG:3857", "EPSG:4326")
|
77
97
|
|
78
98
|
|
79
|
-
def bbox_from_centroid(
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
99
|
+
def bbox_from_centroid(
|
100
|
+
x: Union[int, float],
|
101
|
+
y: Union[int, float],
|
102
|
+
pixel_size: Union[int, float],
|
103
|
+
width: Union[int, float],
|
104
|
+
height: Union[int, float],
|
105
|
+
) -> list:
|
85
106
|
"""
|
86
107
|
Generate a bounding box from a centroid, pixel size and image dimensions.
|
87
108
|
|
@@ -122,9 +143,7 @@ def bbox_from_centroid(x: Union[int, float],
|
|
122
143
|
return [min_y, min_x, max_y, max_x]
|
123
144
|
|
124
145
|
|
125
|
-
def generate_bounding_box(geom: geometry.point.Point,
|
126
|
-
differences: list
|
127
|
-
) -> list:
|
146
|
+
def generate_bounding_box(geom: geometry.point.Point, differences: list) -> list:
|
128
147
|
"""
|
129
148
|
Generate the bounding box of a given point using the difference
|
130
149
|
between the maximum and mininum coordinates of the bounding box
|
@@ -137,12 +156,14 @@ def generate_bounding_box(geom: geometry.point.Point,
|
|
137
156
|
"""
|
138
157
|
long_diff, lat_diff = differences[0], differences[1]
|
139
158
|
lon, lat = geom.x, geom.y
|
140
|
-
|
141
|
-
bbox = (
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
159
|
+
|
160
|
+
bbox = (
|
161
|
+
lon - (long_diff / 2),
|
162
|
+
lat - (lat_diff / 2),
|
163
|
+
lon + (long_diff / 2),
|
164
|
+
lat + (lat_diff / 2),
|
165
|
+
)
|
166
|
+
|
146
167
|
# Round the coordinates to 6 decimals
|
147
168
|
bounding_box = [round(i, 6) for i in bbox]
|
148
169
|
|
@@ -151,17 +172,18 @@ def generate_bounding_box(geom: geometry.point.Point,
|
|
151
172
|
|
152
173
|
def calculate_average_coordinates_distance(bounding_box_by_location: dict) -> list:
|
153
174
|
"""
|
154
|
-
Calculate the mean distance between maximum and minixum longitude
|
155
|
-
|
175
|
+
Calculate the mean distance between maximum and minixum longitude
|
176
|
+
and latitude of the bounding boxes from the existing locations.
|
177
|
+
This is intended to use these mean distance to generate the bounding
|
156
178
|
boxes of the new locations given a centroid.
|
157
179
|
|
158
|
-
:param bounding_box_by_location: dictionary with format
|
159
|
-
|
160
|
-
|
180
|
+
:param bounding_box_by_location: dictionary with format
|
181
|
+
location_id : bounding_box for the existing locations in
|
182
|
+
the sen12floods dataset.
|
183
|
+
:return mean_long_diff, mean_lat_diff: mean longitude
|
184
|
+
and latitude difference in the bounding boxes
|
161
185
|
"""
|
162
|
-
|
163
|
-
|
164
|
-
long_diff_list, lat_diff_list = list(), list()
|
186
|
+
long_diff_list, lat_diff_list = [], []
|
165
187
|
|
166
188
|
for bbox in bounding_box_by_location.values():
|
167
189
|
long_diff = bbox[2] - bbox[0]
|
@@ -175,28 +197,36 @@ def calculate_average_coordinates_distance(bounding_box_by_location: dict) -> li
|
|
175
197
|
return mean_long_diff, mean_lat_diff
|
176
198
|
|
177
199
|
|
178
|
-
def generate_new_locations_bounding_boxes(
|
179
|
-
|
180
|
-
|
181
|
-
) -> dict:
|
200
|
+
def generate_new_locations_bounding_boxes(
|
201
|
+
gdf: gpd.GeoDataFrame, mean_differences: list, latest_id: int
|
202
|
+
) -> dict:
|
182
203
|
"""
|
183
|
-
Generate the bounding box of every new location, using
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
:param
|
189
|
-
|
190
|
-
:
|
191
|
-
|
204
|
+
Generate the bounding box of every new location, using
|
205
|
+
the mean difference between the maximum and minimum calculated
|
206
|
+
longitude and latitude. This function also returns the time
|
207
|
+
interval which we want to request from Sentinel Hub Services.
|
208
|
+
|
209
|
+
:param gdf: GeoDataFrame wiht the new locations that
|
210
|
+
are going to be added to the dataset
|
211
|
+
:param mean_differences: list with the longitude
|
212
|
+
and latitude mean differences, which are going to be used to generate
|
213
|
+
the bounding boxes.
|
214
|
+
:return: bbox_by_new_location: dict with format {<location_id>:
|
215
|
+
{'bounding_box': list(), 'time_interval': list()}, ... }
|
216
|
+
that contains the bounding box and time interval of the imagery for each location
|
192
217
|
"""
|
193
|
-
bbox_by_new_location =
|
218
|
+
bbox_by_new_location = {}
|
194
219
|
|
195
|
-
for
|
220
|
+
for _, row in gdf.iterrows():
|
196
221
|
new_location_id = str(latest_id + 1)
|
197
|
-
time_interval = row[
|
198
|
-
|
199
|
-
|
222
|
+
time_interval = row["Began"].strftime("%Y-%m-%d"), row["Ended"].strftime(
|
223
|
+
"%Y-%m-%d"
|
224
|
+
)
|
225
|
+
bbox = generate_bounding_box(row["geometry"], mean_differences)
|
226
|
+
bbox_by_new_location[new_location_id] = {
|
227
|
+
"bounding_box": bbox,
|
228
|
+
"time_interval": time_interval,
|
229
|
+
}
|
200
230
|
latest_id += 1
|
201
231
|
|
202
232
|
return bbox_by_new_location
|
eotdl/tools/metadata.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
|
1
|
+
"""
|
2
2
|
Metadata utilities for STAC
|
3
|
-
|
3
|
+
"""
|
4
4
|
|
5
5
|
import json
|
6
6
|
|
@@ -20,25 +20,27 @@ def get_item_metadata(raster_path: str) -> str:
|
|
20
20
|
raster_dir_path = dirname(raster_path)
|
21
21
|
# Get the metadata JSON file
|
22
22
|
# Check if there is a metadata.json file in the directory
|
23
|
-
if
|
24
|
-
metadata_json = join(raster_dir_path,
|
23
|
+
if "metadata.json" in listdir(raster_dir_path):
|
24
|
+
metadata_json = join(raster_dir_path, "metadata.json")
|
25
25
|
else:
|
26
26
|
# If there is no metadata.json file in the directory, check if there is
|
27
27
|
# a json file with the same name as the raster file
|
28
28
|
base = splitext(raster_path)[0]
|
29
|
-
metadata_json = base +
|
29
|
+
metadata_json = base + ".json"
|
30
30
|
if not exists(metadata_json):
|
31
31
|
# If there is no metadata file in the directory, return None
|
32
32
|
return None
|
33
|
-
|
33
|
+
|
34
34
|
# Open the metadata file and return it
|
35
|
-
with open(metadata_json,
|
35
|
+
with open(metadata_json, "r", encoding="utf-8") as f:
|
36
36
|
metadata = json.load(f)
|
37
|
-
|
37
|
+
|
38
38
|
return metadata
|
39
39
|
|
40
40
|
|
41
|
-
def remove_raster_metadata(
|
41
|
+
def remove_raster_metadata(
|
42
|
+
folder: str, metadata_file: Optional[str] = "metadata.json"
|
43
|
+
) -> None:
|
42
44
|
"""
|
43
45
|
Remove metadata.json file from a folder
|
44
46
|
|
@@ -48,5 +50,5 @@ def remove_raster_metadata(folder: str, metadata_file: Optional[str] = 'metadata
|
|
48
50
|
# Search for all the metadata files in the folder
|
49
51
|
metadata_files = glob(join(folder, "**", metadata_file), recursive=True)
|
50
52
|
# Remove all the metadata files
|
51
|
-
for
|
52
|
-
remove(
|
53
|
+
for metadata in metadata_files:
|
54
|
+
remove(metadata)
|
eotdl/tools/paths.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
|
1
|
+
"""
|
2
2
|
Paths utils
|
3
|
-
|
3
|
+
"""
|
4
4
|
|
5
5
|
from os.path import dirname
|
6
6
|
from typing import Union, Optional
|
@@ -28,24 +28,24 @@ def cut_images(images_list: Union[list, tuple]) -> list:
|
|
28
28
|
|
29
29
|
:return: list of unique directories
|
30
30
|
"""
|
31
|
-
dirnames =
|
32
|
-
images =
|
31
|
+
dirnames = []
|
32
|
+
images = []
|
33
33
|
|
34
34
|
for image in images_list:
|
35
|
-
|
36
|
-
if
|
37
|
-
dirnames.append(
|
35
|
+
directory = dirname(image)
|
36
|
+
if directory not in dirnames:
|
37
|
+
dirnames.append(directory)
|
38
38
|
images.append(image)
|
39
39
|
|
40
40
|
return images
|
41
41
|
|
42
42
|
|
43
|
-
def get_all_images_in_path(path: str, image_format: Optional[str] =
|
44
|
-
|
45
|
-
|
43
|
+
def get_all_images_in_path(path: str, image_format: Optional[str] = "tif") -> list:
|
44
|
+
"""
|
45
|
+
Get all the images in a directory
|
46
46
|
|
47
|
-
|
47
|
+
:param path: path to the directory
|
48
48
|
|
49
|
-
|
50
|
-
|
51
|
-
|
49
|
+
:return: list of images
|
50
|
+
"""
|
51
|
+
return glob(str(path) + f"/**/*.{image_format}", recursive=True)
|
eotdl/tools/stac.py
CHANGED
@@ -2,21 +2,22 @@
|
|
2
2
|
Module for data engineering with STAC elements
|
3
3
|
"""
|
4
4
|
|
5
|
-
import geopandas as gpd
|
6
|
-
import pystac
|
7
|
-
|
8
5
|
from os.path import dirname, join, abspath
|
9
6
|
from os import makedirs
|
10
7
|
from json import dumps
|
11
8
|
from typing import Union, Optional
|
12
|
-
from tqdm import tqdm
|
13
|
-
from traceback import print_exc
|
14
9
|
from shutil import rmtree
|
10
|
+
from traceback import print_exc
|
11
|
+
|
12
|
+
import geopandas as gpd
|
13
|
+
import pystac
|
14
|
+
|
15
|
+
from tqdm import tqdm
|
15
16
|
|
16
17
|
|
17
18
|
def stac_items_to_gdf(items: pystac.ItemCollection) -> gpd.GeoDataFrame:
|
18
19
|
"""
|
19
|
-
Get a GeoDataFrame from a given pystac.ItemCollection.
|
20
|
+
Get a GeoDataFrame from a given pystac.ItemCollection.
|
20
21
|
|
21
22
|
:param: items: A pystac.ItemCollection
|
22
23
|
:return: GeoDataframe from the given ItemCollection
|
@@ -29,12 +30,12 @@ def stac_items_to_gdf(items: pystac.ItemCollection) -> gpd.GeoDataFrame:
|
|
29
30
|
if f not in features:
|
30
31
|
# Add all the keys in the properties dict as columns in the GeoDataFrame
|
31
32
|
for k, v in f.items():
|
32
|
-
if k not in f[
|
33
|
-
f[
|
34
|
-
if
|
35
|
-
f[
|
36
|
-
features.append(f)
|
37
|
-
|
33
|
+
if k not in f["properties"] and k != "geometry":
|
34
|
+
f["properties"][k] = v
|
35
|
+
if "scene_id" in f["properties"]:
|
36
|
+
f["properties"]["scene_id"] = f["id"].split("_")[3]
|
37
|
+
features.append(f)
|
38
|
+
|
38
39
|
return gpd.GeoDataFrame.from_features(features)
|
39
40
|
|
40
41
|
|
@@ -65,9 +66,10 @@ def get_all_children(obj: pystac.STACObject) -> list:
|
|
65
66
|
return children
|
66
67
|
|
67
68
|
|
68
|
-
def make_links_relative_to_path(
|
69
|
-
|
70
|
-
|
69
|
+
def make_links_relative_to_path(
|
70
|
+
path: str,
|
71
|
+
catalog: Union[pystac.Catalog, str],
|
72
|
+
) -> pystac.Catalog:
|
71
73
|
"""
|
72
74
|
Makes all asset HREFs and links in the STAC catalog relative to a given path
|
73
75
|
"""
|
@@ -76,9 +78,9 @@ def make_links_relative_to_path(path: str,
|
|
76
78
|
path = abspath(path)
|
77
79
|
|
78
80
|
# Create a temporary catalog in the destination path to set as root
|
79
|
-
future_path = join(path,
|
81
|
+
future_path = join(path, "catalog.json")
|
80
82
|
makedirs(path, exist_ok=True)
|
81
|
-
with open(future_path,
|
83
|
+
with open(future_path, "w", encoding="utf-8") as f:
|
82
84
|
f.write(dumps(catalog.to_dict(), indent=4))
|
83
85
|
temp_catalog = pystac.Catalog.from_file(future_path)
|
84
86
|
|
@@ -88,7 +90,7 @@ def make_links_relative_to_path(path: str,
|
|
88
90
|
for collection in catalog.get_children():
|
89
91
|
# Create new collection
|
90
92
|
new_collection = collection.clone()
|
91
|
-
new_collection.set_self_href(join(path, collection.id,
|
93
|
+
new_collection.set_self_href(join(path, collection.id, "collection.json"))
|
92
94
|
new_collection.set_root(catalog)
|
93
95
|
new_collection.set_parent(catalog)
|
94
96
|
# Remove old collection and add new one to catalog
|
@@ -97,7 +99,9 @@ def make_links_relative_to_path(path: str,
|
|
97
99
|
for item in collection.get_all_items():
|
98
100
|
# Create new item from old collection and add it to the new collection
|
99
101
|
new_item = item.clone()
|
100
|
-
new_item.set_self_href(
|
102
|
+
new_item.set_self_href(
|
103
|
+
join(path, collection.id, item.id, f"{item.id}.json")
|
104
|
+
)
|
101
105
|
new_item.set_parent(collection)
|
102
106
|
new_item.set_root(catalog)
|
103
107
|
new_item.make_asset_hrefs_relative()
|
@@ -108,12 +112,13 @@ def make_links_relative_to_path(path: str,
|
|
108
112
|
return catalog
|
109
113
|
|
110
114
|
|
111
|
-
def merge_stac_catalogs(
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
115
|
+
def merge_stac_catalogs(
|
116
|
+
catalog_1: Union[pystac.Catalog, str],
|
117
|
+
catalog_2: Union[pystac.Catalog, str],
|
118
|
+
destination: Optional[str] = None,
|
119
|
+
keep_extensions: Optional[bool] = False,
|
120
|
+
catalog_type: Optional[pystac.CatalogType] = pystac.CatalogType.SELF_CONTAINED,
|
121
|
+
) -> None:
|
117
122
|
"""
|
118
123
|
Merge two STAC catalogs, keeping the properties, collection and items of both catalogs
|
119
124
|
"""
|
@@ -122,10 +127,10 @@ def merge_stac_catalogs(catalog_1: Union[pystac.Catalog, str],
|
|
122
127
|
if isinstance(catalog_2, str):
|
123
128
|
catalog_2 = pystac.Catalog.from_file(catalog_2)
|
124
129
|
|
125
|
-
for col1 in tqdm(catalog_1.get_children(), desc=
|
130
|
+
for col1 in tqdm(catalog_1.get_children(), desc="Merging catalogs..."):
|
126
131
|
# Check if the collection exists in catalog_2
|
127
132
|
col2 = catalog_2.get_child(col1.id)
|
128
|
-
if col2
|
133
|
+
if not col2:
|
129
134
|
# If it does not exist, add it
|
130
135
|
col1_ = col1.clone()
|
131
136
|
catalog_2.add_child(col1)
|
@@ -157,10 +162,10 @@ def merge_stac_catalogs(catalog_1: Union[pystac.Catalog, str],
|
|
157
162
|
try:
|
158
163
|
print("Validating and saving...")
|
159
164
|
catalog_2.validate()
|
160
|
-
rmtree(
|
161
|
-
|
162
|
-
|
163
|
-
|
165
|
+
rmtree(
|
166
|
+
destination
|
167
|
+
) if not destination else None # Remove the old catalog and replace it with the new one
|
168
|
+
catalog_2.normalize_and_save(root_href=destination, catalog_type=catalog_type)
|
164
169
|
print("Success!")
|
165
170
|
except pystac.STACValidationError:
|
166
171
|
# Return full callback
|
eotdl/tools/time_utils.py
CHANGED
@@ -1,14 +1,22 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
"""
|
2
|
+
Time utils
|
3
|
+
"""
|
3
4
|
|
4
5
|
from datetime import datetime, timedelta
|
5
|
-
from typing import Union, Optional
|
6
|
+
from typing import Union, Optional
|
7
|
+
from dateutil import parser
|
8
|
+
|
9
|
+
import geopandas as gpd
|
10
|
+
import pandas as pd
|
6
11
|
|
7
12
|
|
8
13
|
def is_time_interval(time_interval: list) -> bool:
|
14
|
+
"""
|
15
|
+
Check if is time interval and is valid
|
16
|
+
"""
|
9
17
|
if not isinstance(time_interval, (list, tuple)) or len(time_interval) != 2:
|
10
18
|
return False
|
11
|
-
|
19
|
+
|
12
20
|
for value in time_interval:
|
13
21
|
if not isinstance(value, str):
|
14
22
|
return False
|
@@ -19,6 +27,9 @@ def is_time_interval(time_interval: list) -> bool:
|
|
19
27
|
|
20
28
|
|
21
29
|
def is_valid_date(date_str: str) -> bool:
|
30
|
+
"""
|
31
|
+
Check if a date is valid
|
32
|
+
"""
|
22
33
|
try:
|
23
34
|
datetime.strptime(date_str, "%Y-%m-%d")
|
24
35
|
return True
|
@@ -26,24 +37,29 @@ def is_valid_date(date_str: str) -> bool:
|
|
26
37
|
return False
|
27
38
|
|
28
39
|
|
29
|
-
def get_first_last_dates(
|
40
|
+
def get_first_last_dates(
|
41
|
+
dataframe: Union[pd.DataFrame, gpd.GeoDataFrame],
|
42
|
+
dates_column: Optional[str] = "datetime",
|
43
|
+
):
|
30
44
|
"""
|
45
|
+
Get first and last dates from a dataframe
|
31
46
|
"""
|
32
47
|
dataframe[dates_column] = dataframe[dates_column].apply(lambda x: sorted(x))
|
33
|
-
dataframe[
|
34
|
-
dataframe[
|
35
|
-
dataframe = dataframe.sort_values(by=[
|
48
|
+
dataframe["first_date"] = dataframe["dates_list"].apply(lambda x: x[0])
|
49
|
+
dataframe["last_date"] = dataframe["dates_list"].apply(lambda x: x[-1])
|
50
|
+
dataframe = dataframe.sort_values(by=["first_date", "last_date"])
|
36
51
|
# Sort by sequence id
|
37
|
-
dataframe = dataframe.sort_values(by=[
|
52
|
+
dataframe = dataframe.sort_values(by=["location_id"])
|
38
53
|
# Convert first_date and last_date to datetime, in format YYYY-MM-DD
|
39
|
-
dataframe[
|
40
|
-
dataframe[
|
54
|
+
dataframe["first_date"] = pd.to_datetime(dataframe["first_date"], format="%Y-%m-%d")
|
55
|
+
dataframe["last_date"] = pd.to_datetime(dataframe["last_date"], format="%Y-%m-%d")
|
41
56
|
|
42
57
|
return dataframe
|
43
58
|
|
44
59
|
|
45
60
|
def create_time_slots(start_date: datetime, end_date: datetime, n_chunks: int):
|
46
61
|
"""
|
62
|
+
Create time slots from start date to end date, with n_chunks
|
47
63
|
"""
|
48
64
|
if isinstance(start_date, str):
|
49
65
|
start_date = datetime.strptime(start_date, "%Y-%m-%d")
|
@@ -56,16 +72,19 @@ def create_time_slots(start_date: datetime, end_date: datetime, n_chunks: int):
|
|
56
72
|
return slots
|
57
73
|
|
58
74
|
|
59
|
-
def expand_time_interval(
|
75
|
+
def expand_time_interval(
|
76
|
+
time_interval: Union[list, tuple], time_format: str = "%Y-%m-%dT%H:%M:%S.%fZ"
|
77
|
+
) -> list:
|
60
78
|
"""
|
79
|
+
Expand time interval to get more data
|
61
80
|
"""
|
62
81
|
start_date = time_interval[0]
|
63
82
|
end_date = time_interval[1]
|
64
83
|
|
65
84
|
if isinstance(start_date, str):
|
66
|
-
start_date = datetime.datetime.strptime(start_date,
|
85
|
+
start_date = datetime.datetime.strptime(start_date, time_format)
|
67
86
|
if isinstance(end_date, str):
|
68
|
-
end_date = datetime.datetime.strptime(end_date,
|
87
|
+
end_date = datetime.datetime.strptime(end_date, time_format)
|
69
88
|
|
70
89
|
# Add one day to start date and remove one day to end date
|
71
90
|
new_start_date = start_date - datetime.timedelta(days=1)
|
@@ -79,15 +98,22 @@ def expand_time_interval(time_interval: Union[list, tuple], format: str='%Y-%m-%
|
|
79
98
|
|
80
99
|
|
81
100
|
def prepare_time_interval(date):
|
101
|
+
"""
|
102
|
+
Prepare time interval to request data
|
103
|
+
"""
|
82
104
|
if isinstance(date, str):
|
83
105
|
date = datetime.strptime(date, "%Y-%m-%d")
|
84
106
|
elif isinstance(date, tuple):
|
85
|
-
if
|
86
|
-
raise ValueError('The time interval must be a range of two dates, with format YYYY-MM-DD or a datetime object')
|
87
|
-
else:
|
107
|
+
if is_time_interval(date):
|
88
108
|
return date
|
109
|
+
else:
|
110
|
+
raise ValueError(
|
111
|
+
"The time interval must be a range of two dates, with format YYYY-MM-DD or a datetime object"
|
112
|
+
)
|
89
113
|
elif not isinstance(date, datetime):
|
90
|
-
raise ValueError(
|
114
|
+
raise ValueError(
|
115
|
+
"The date must be a string with format YYYY-MM-DD or a datetime object"
|
116
|
+
)
|
91
117
|
|
92
118
|
date_day_before = date - timedelta(days=1)
|
93
119
|
date_next_day = date + timedelta(days=1)
|
@@ -98,10 +124,11 @@ def prepare_time_interval(date):
|
|
98
124
|
return (date_day_before_str, date_next_day_str)
|
99
125
|
|
100
126
|
|
101
|
-
def get_day_between(
|
102
|
-
|
103
|
-
|
127
|
+
def get_day_between(
|
128
|
+
from_date: Union[datetime, str], to_date: Union[datetime, str]
|
129
|
+
) -> str:
|
104
130
|
"""
|
131
|
+
Get the day between two dates
|
105
132
|
"""
|
106
133
|
if isinstance(from_date, str):
|
107
134
|
from_date = datetime.strptime(from_date, "%Y-%m-%dT%H:%M:%SZ")
|
@@ -109,8 +136,8 @@ def get_day_between(from_date: Union[datetime, str],
|
|
109
136
|
to_date = datetime.strptime(to_date, "%Y-%m-%dT%H:%M:%SZ")
|
110
137
|
|
111
138
|
date_between = from_date + timedelta(days=1)
|
112
|
-
date_between = date_between.strftime("%Y-%m-%d")
|
113
|
-
|
139
|
+
date_between = date_between.strftime("%Y-%m-%d")
|
140
|
+
|
114
141
|
return date_between
|
115
142
|
|
116
143
|
|
@@ -120,8 +147,8 @@ def format_time_acquired(dt: Union[str, datetime]) -> str:
|
|
120
147
|
|
121
148
|
:param dt: date time to format
|
122
149
|
"""
|
123
|
-
from dateutil import parser
|
124
|
-
|
125
150
|
dt_str = parser.parse(dt).strftime("%Y-%m-%dT%H:%M:%S.%f")
|
151
|
+
# convert to datetime object
|
152
|
+
dt = datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%S.%f")
|
126
153
|
|
127
|
-
return
|
154
|
+
return dt
|