eotdl 2023.7.19.post4__py3-none-any.whl → 2023.9.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eotdl/commands/datasets.py +15 -29
- eotdl/curation/__init__.py +5 -5
- eotdl/curation/formatters.py +0 -2
- eotdl/curation/metadata.py +34 -9
- eotdl/curation/stac/assets.py +127 -0
- eotdl/curation/stac/dataframe.py +8 -4
- eotdl/curation/stac/extensions.py +295 -46
- eotdl/curation/stac/extent.py +130 -0
- eotdl/curation/stac/ml_dataset.py +509 -0
- eotdl/curation/stac/parsers.py +2 -0
- eotdl/curation/stac/stac.py +309 -286
- eotdl/curation/stac/utils.py +47 -1
- eotdl/datasets/__init__.py +2 -2
- eotdl/datasets/download.py +16 -3
- eotdl/datasets/ingest.py +21 -10
- eotdl/datasets/retrieve.py +10 -2
- eotdl/src/repos/APIRepo.py +42 -18
- eotdl/src/repos/AuthRepo.py +3 -3
- eotdl/src/usecases/auth/IsLogged.py +5 -3
- eotdl/src/usecases/datasets/DownloadDataset.py +35 -6
- eotdl/src/usecases/datasets/DownloadFileURL.py +22 -0
- eotdl/src/usecases/datasets/IngestFile.py +48 -28
- eotdl/src/usecases/datasets/IngestSTAC.py +43 -8
- eotdl/src/usecases/datasets/RetrieveDatasets.py +3 -2
- eotdl/src/usecases/datasets/__init__.py +1 -0
- eotdl/tools/sen12floods/tools.py +3 -3
- eotdl/tools/stac.py +8 -2
- {eotdl-2023.7.19.post4.dist-info → eotdl-2023.9.14.dist-info}/METADATA +2 -1
- {eotdl-2023.7.19.post4.dist-info → eotdl-2023.9.14.dist-info}/RECORD +31 -27
- {eotdl-2023.7.19.post4.dist-info → eotdl-2023.9.14.dist-info}/WHEEL +1 -1
- {eotdl-2023.7.19.post4.dist-info → eotdl-2023.9.14.dist-info}/entry_points.txt +0 -0
eotdl/curation/stac/stac.py
CHANGED
@@ -2,67 +2,82 @@
|
|
2
2
|
Module for generating STAC metadata
|
3
3
|
"""
|
4
4
|
|
5
|
+
from typing import Union
|
5
6
|
import pandas as pd
|
6
|
-
import json
|
7
7
|
import pystac
|
8
|
-
from
|
8
|
+
from tqdm import tqdm
|
9
9
|
|
10
|
-
from os import
|
11
|
-
from
|
10
|
+
from os.path import join, basename, dirname
|
11
|
+
from shutil import rmtree
|
12
12
|
|
13
13
|
import rasterio
|
14
14
|
from rasterio.warp import transform_bounds
|
15
|
+
from typing import Union, List
|
15
16
|
|
16
17
|
from datetime import datetime
|
17
18
|
from shapely.geometry import Polygon, mapping
|
18
19
|
from glob import glob
|
19
|
-
|
20
|
-
from stac_validator.stac_validator import StacValidate
|
20
|
+
from typing import Union, Optional
|
21
21
|
|
22
22
|
from .parsers import STACIdParser, StructuredParser
|
23
|
-
from .
|
24
|
-
from .
|
23
|
+
from .assets import STACAssetGenerator
|
24
|
+
from .utils import (format_time_acquired,
|
25
|
+
cut_images,
|
26
|
+
get_item_metadata)
|
27
|
+
from .extensions import (type_stac_extensions_dict,
|
28
|
+
SUPPORTED_EXTENSIONS,
|
29
|
+
LabelExtensionObject)
|
30
|
+
from .extent import (get_unknow_extent,
|
31
|
+
get_collection_extent)
|
25
32
|
|
26
33
|
|
27
34
|
class STACGenerator:
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
35
|
+
def __init__(
|
36
|
+
self,
|
37
|
+
image_format: str = "tiff",
|
38
|
+
catalog_type: pystac.CatalogType = pystac.CatalogType.SELF_CONTAINED,
|
39
|
+
item_parser: STACIdParser = StructuredParser,
|
40
|
+
assets_generator: STACAssetGenerator = STACAssetGenerator,
|
41
|
+
) -> None:
|
34
42
|
"""
|
35
43
|
Initialize the STAC generator
|
36
|
-
|
44
|
+
|
37
45
|
:param image_format: image format of the assets
|
38
46
|
:param catalog_type: type of the catalog
|
39
47
|
:param item_parser: parser to get the item ID
|
48
|
+
:param assets_generator: generator to generate the assets
|
40
49
|
"""
|
41
50
|
self._image_format = image_format
|
42
51
|
self._catalog_type = catalog_type
|
43
52
|
self._item_parser = item_parser()
|
53
|
+
self._assets_generator = assets_generator()
|
44
54
|
self._extensions_dict: dict = type_stac_extensions_dict
|
45
|
-
self.
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
55
|
+
self._stac_dataframe = pd.DataFrame()
|
56
|
+
|
57
|
+
def generate_stac_metadata(
|
58
|
+
self,
|
59
|
+
id: str,
|
60
|
+
description: str,
|
61
|
+
stac_dataframe: pd.DataFrame = None,
|
62
|
+
output_folder: str = "stac",
|
63
|
+
kwargs: dict = {},
|
64
|
+
) -> None:
|
54
65
|
"""
|
55
66
|
Generate STAC metadata for a given directory containing the assets to generate metadata
|
56
67
|
|
57
|
-
:param stac_dataframe: dataframe with the STAC metadata of a given directory containing the assets to generate metadata
|
58
68
|
:param id: id of the catalog
|
59
69
|
:param description: description of the catalog
|
70
|
+
:param stac_dataframe: dataframe with the STAC metadata of a given directory containing the assets to generate metadata
|
60
71
|
:param output_folder: output folder to write the catalog to
|
61
72
|
"""
|
62
|
-
self._stac_dataframe =
|
63
|
-
|
73
|
+
self._stac_dataframe = (
|
74
|
+
stac_dataframe if self._stac_dataframe.empty else self._stac_dataframe
|
75
|
+
)
|
76
|
+
if self._stac_dataframe.empty:
|
77
|
+
raise ValueError("No STAC dataframe provided")
|
78
|
+
|
64
79
|
# Create an empty catalog
|
65
|
-
catalog =
|
80
|
+
catalog = pystac.Catalog(id=id, description=description, **kwargs)
|
66
81
|
|
67
82
|
# Add the collections to the catalog
|
68
83
|
collections = self._stac_dataframe.collection.unique()
|
@@ -72,69 +87,63 @@ class STACGenerator:
|
|
72
87
|
collection = self.generate_stac_collection(collection_path)
|
73
88
|
# Add the collection to the catalog
|
74
89
|
catalog.add_child(collection)
|
75
|
-
|
90
|
+
|
76
91
|
# Add the catalog to the root directory
|
77
92
|
catalog.normalize_hrefs(output_folder)
|
78
93
|
|
79
94
|
# Validate the catalog
|
95
|
+
print("Validating and saving catalog...")
|
80
96
|
try:
|
81
97
|
pystac.validation.validate(catalog)
|
82
98
|
catalog.save(catalog_type=self._catalog_type)
|
99
|
+
print("Success!")
|
83
100
|
except pystac.STACValidationError as e:
|
84
|
-
print(f
|
101
|
+
print(f"Catalog validation error: {e}")
|
85
102
|
return
|
86
103
|
|
87
|
-
def get_stac_dataframe(self,
|
104
|
+
def get_stac_dataframe(self,
|
105
|
+
path: str,
|
106
|
+
collections: Union[str, dict]='source',
|
107
|
+
bands: dict=None,
|
108
|
+
extensions: dict=None
|
109
|
+
) -> pd.DataFrame:
|
88
110
|
"""
|
89
111
|
Get a dataframe with the STAC metadata of a given directory containing the assets to generate metadata
|
90
112
|
|
91
113
|
:param path: path to the root directory
|
114
|
+
:param collections: dictionary with the collections
|
115
|
+
:param bands: dictionary with the bands
|
92
116
|
:param extensions: dictionary with the extensions
|
93
|
-
:param image_format: image format of the assets
|
94
117
|
"""
|
95
118
|
images = glob(str(path) + f'/**/*.{self._image_format}', recursive=True)
|
96
|
-
|
119
|
+
if self._assets_generator.type == 'Extracted':
|
120
|
+
images = cut_images(images)
|
121
|
+
|
97
122
|
labels, ixs = self._format_labels(images)
|
98
|
-
|
99
|
-
|
100
|
-
|
123
|
+
bands_values = self._get_items_list_from_dict(labels, bands)
|
124
|
+
extensions_values = self._get_items_list_from_dict(labels, extensions)
|
125
|
+
|
126
|
+
if collections == "source":
|
127
|
+
# List of path with the same value repeated as many times as the number of images
|
128
|
+
collections_values = [join(path, "source") for i in range(len(images))]
|
129
|
+
else:
|
130
|
+
try:
|
131
|
+
collections_values = [join(path, value) for value in self._get_items_list_from_dict(labels, collections)]
|
132
|
+
except TypeError as e:
|
133
|
+
# TODO control this error
|
134
|
+
raise TypeError(f'Control this error')
|
101
135
|
|
102
136
|
df = pd.DataFrame({'image': images,
|
103
137
|
'label': labels,
|
104
138
|
'ix': ixs,
|
105
|
-
'collection':
|
106
|
-
'extensions':
|
107
|
-
'bands':
|
139
|
+
'collection': collections_values,
|
140
|
+
'extensions': extensions_values,
|
141
|
+
'bands': bands_values
|
142
|
+
})
|
108
143
|
|
109
|
-
|
110
|
-
|
111
|
-
def _get_images_common_prefix(self, images: list) -> list:
|
112
|
-
"""
|
113
|
-
Get the common prefix of a list of images
|
114
|
-
|
115
|
-
:param images: list of images
|
116
|
-
"""
|
117
|
-
images_common_prefix_dict = dict()
|
118
|
-
|
119
|
-
images_dirs = [dirname(i) for i in images]
|
120
|
-
|
121
|
-
for image in images_dirs:
|
122
|
-
path = image
|
123
|
-
common = False
|
124
|
-
while not common:
|
125
|
-
n = count_ocurrences(path, images_dirs)
|
126
|
-
if n > 1:
|
127
|
-
images_common_prefix_dict[image] = path
|
128
|
-
common = True
|
129
|
-
else:
|
130
|
-
path = dirname(path)
|
131
|
-
|
132
|
-
images_common_prefix_list = list()
|
133
|
-
for i in images:
|
134
|
-
images_common_prefix_list.append(images_common_prefix_dict[dirname(i)])
|
144
|
+
self._stac_dataframe = df
|
135
145
|
|
136
|
-
return
|
137
|
-
|
146
|
+
return df
|
138
147
|
|
139
148
|
def _format_labels(self, images):
|
140
149
|
"""
|
@@ -142,10 +151,10 @@ class STACGenerator:
|
|
142
151
|
|
143
152
|
:param images: list of images
|
144
153
|
"""
|
145
|
-
labels = [x.split(
|
154
|
+
labels = [x.split("/")[-1].split("_")[0].split(".")[0] for x in images]
|
146
155
|
ixs = [labels.index(x) for x in labels]
|
147
156
|
return labels, ixs
|
148
|
-
|
157
|
+
|
149
158
|
def _get_items_list_from_dict(self, labels: list, items: dict) -> list:
|
150
159
|
"""
|
151
160
|
Get a list of items from a dictionary
|
@@ -164,160 +173,52 @@ class STACGenerator:
|
|
164
173
|
items_list.append(None)
|
165
174
|
|
166
175
|
return items_list
|
167
|
-
|
168
|
-
def _get_collection_extent(self, path: str) -> pystac.Extent:
|
169
|
-
"""
|
170
|
-
Get the extent of a collection
|
171
|
-
|
172
|
-
:param path: path to the directory
|
173
|
-
"""
|
174
|
-
# Get the spatial extent of the collection
|
175
|
-
spatial_extent = self._get_collection_spatial_extent(path)
|
176
|
-
# Get the temporal interval of the collection
|
177
|
-
temporal_interval = self._get_collection_temporal_interval(path)
|
178
|
-
# Create the Extent object
|
179
|
-
extent = pystac.Extent(spatial=spatial_extent, temporal=temporal_interval)
|
180
|
-
|
181
|
-
return extent
|
182
|
-
|
183
|
-
def _get_collection_spatial_extent(self, path: str) -> pystac.SpatialExtent:
|
184
|
-
"""
|
185
|
-
Get the spatial extent of a collection
|
186
|
-
|
187
|
-
:param path: path to the directory
|
188
|
-
"""
|
189
|
-
# Get the bounding boxes of all the rasters in the path
|
190
|
-
bboxes = list()
|
191
|
-
# use glob
|
192
|
-
rasters = glob(f'{path}/**/*.{self._image_format}', recursive=True)
|
193
|
-
for raster in rasters:
|
194
|
-
with rasterio.open(raster) as ds:
|
195
|
-
bounds = ds.bounds
|
196
|
-
dst_crs = 'EPSG:4326'
|
197
|
-
try:
|
198
|
-
left, bottom, right, top = rasterio.warp.transform_bounds(ds.crs, dst_crs, *bounds)
|
199
|
-
bbox = [left, bottom, right, top]
|
200
|
-
except rasterio.errors.CRSError:
|
201
|
-
spatial_extent = pystac.SpatialExtent([[0, 0, 0, 0]])
|
202
|
-
return spatial_extent
|
203
|
-
bboxes.append(bbox)
|
204
|
-
# Get the minimum and maximum values of the bounding boxes
|
205
|
-
try:
|
206
|
-
left = min([bbox[0] for bbox in bboxes])
|
207
|
-
bottom = min([bbox[1] for bbox in bboxes])
|
208
|
-
right = max([bbox[2] for bbox in bboxes])
|
209
|
-
top = max([bbox[3] for bbox in bboxes])
|
210
|
-
spatial_extent = pystac.SpatialExtent([[left, bottom, right, top]])
|
211
|
-
except ValueError:
|
212
|
-
spatial_extent = pystac.SpatialExtent([[0, 0, 0, 0]])
|
213
|
-
finally:
|
214
|
-
return spatial_extent
|
215
|
-
|
216
|
-
def _get_collection_temporal_interval(self, path: str) -> pystac.TemporalExtent:
|
217
|
-
"""
|
218
|
-
Get the temporal interval of a collection
|
219
176
|
|
220
|
-
|
221
|
-
"""
|
222
|
-
# Get all the metadata.json files in the path
|
223
|
-
metadata_json_files = glob(f'{path}/**/*.json', recursive=True)
|
224
|
-
if not metadata_json_files:
|
225
|
-
return self._get_unknow_temporal_interval()
|
226
|
-
|
227
|
-
# Get the temporal interval of every metadata.json file
|
228
|
-
temporal_intervals = list()
|
229
|
-
for metadata_json_file in metadata_json_files:
|
230
|
-
with open(metadata_json_file, 'r') as f:
|
231
|
-
metadata = json.load(f)
|
232
|
-
temporal_intervals.append(metadata['date-adquired']) if metadata['date-adquired'] else None
|
233
|
-
if temporal_intervals: # TODO control in DEM data
|
234
|
-
try:
|
235
|
-
# Get the minimum and maximum values of the temporal intervals
|
236
|
-
min_date = min([datetime.strptime(interval, '%Y-%m-%d') for interval in temporal_intervals])
|
237
|
-
max_date = max([datetime.strptime(interval, '%Y-%m-%d') for interval in temporal_intervals])
|
238
|
-
except ValueError:
|
239
|
-
min_date = datetime.strptime('2000-01-01', '%Y-%m-%d')
|
240
|
-
max_date = datetime.strptime('2023-12-31', '%Y-%m-%d')
|
241
|
-
finally:
|
242
|
-
# Create the temporal interval
|
243
|
-
temporal_interval = pystac.TemporalExtent([min_date, max_date])
|
244
|
-
else:
|
245
|
-
return self._get_unknow_temporal_interval()
|
246
|
-
|
247
|
-
return temporal_interval
|
248
|
-
|
249
|
-
def _get_unknow_temporal_interval(self) -> pystac.TemporalExtent:
|
250
|
-
"""
|
251
|
-
Get an unknown temporal interval
|
252
|
-
"""
|
253
|
-
min_date = datetime.strptime('2000-01-01', '%Y-%m-%d')
|
254
|
-
max_date = datetime.strptime('2023-12-31', '%Y-%m-%d')
|
255
|
-
|
256
|
-
return pystac.TemporalExtent([min_date, max_date])
|
257
|
-
|
258
|
-
def create_stac_catalog(self, id: str, description: str, kwargs: dict={}) -> pystac.Catalog:
|
259
|
-
"""
|
260
|
-
Create a STAC catalog
|
261
|
-
|
262
|
-
:param id: id of the catalog
|
263
|
-
:param description: description of the catalog
|
264
|
-
:param params: additional parameters
|
265
|
-
"""
|
266
|
-
return pystac.Catalog(id=id, description=description, **kwargs)
|
267
|
-
|
268
|
-
def generate_stac_collection(self, path: str) -> pystac.Collection:
|
177
|
+
def generate_stac_collection(self, collection_path: str) -> pystac.Collection:
|
269
178
|
"""
|
270
179
|
Generate a STAC collection from a directory containing the assets to generate metadata
|
271
180
|
|
272
|
-
:param
|
181
|
+
:param collection_path: path to the collection
|
273
182
|
"""
|
183
|
+
# Get the images of the collection, as they are needed to obtain the collection extent
|
184
|
+
collection_images = self._stac_dataframe[
|
185
|
+
self._stac_dataframe["collection"] == collection_path
|
186
|
+
]["image"]
|
274
187
|
# Get the collection extent
|
275
|
-
extent =
|
188
|
+
extent = get_collection_extent(collection_images)
|
276
189
|
# Create the collection
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
190
|
+
collection_id = basename(collection_path)
|
191
|
+
collection = pystac.Collection(
|
192
|
+
id=collection_id, description="Collection", extent=extent
|
193
|
+
)
|
194
|
+
|
195
|
+
print(f"Generating {collection_id} collection...")
|
196
|
+
for image in tqdm(collection_images):
|
197
|
+
# Create the item
|
198
|
+
item = self.create_stac_item(image)
|
199
|
+
# Add the item to the collection
|
200
|
+
collection.add_item(item)
|
201
|
+
|
289
202
|
# Return the collection
|
290
203
|
return collection
|
291
204
|
|
292
|
-
def
|
293
|
-
"""
|
294
|
-
Create a STAC collection
|
295
|
-
|
296
|
-
:param id: id of the collection
|
297
|
-
:param description: description of the collection
|
298
|
-
:param extent: extent of the collection
|
299
|
-
:param params: additional parameters
|
300
|
-
"""
|
301
|
-
return pystac.Collection(id=id, description=description, extent=extent, **kwargs)
|
302
|
-
|
303
|
-
def create_stac_item(self,
|
304
|
-
raster_path: str,
|
305
|
-
kwargs: dict={}
|
306
|
-
) -> pystac.Item:
|
205
|
+
def create_stac_item(self, raster_path: str, kwargs: dict = {}) -> pystac.Item:
|
307
206
|
"""
|
308
207
|
Create a STAC item from a directory containing the raster files and the metadata.json file
|
309
208
|
|
310
209
|
:param raster_path: path to the raster file
|
311
210
|
"""
|
312
211
|
# Check if there is any metadata file in the directory associated to the raster file
|
313
|
-
metadata =
|
212
|
+
metadata = get_item_metadata(raster_path)
|
314
213
|
|
315
214
|
# Obtain the bounding box from the raster
|
316
215
|
with rasterio.open(raster_path) as ds:
|
317
216
|
bounds = ds.bounds
|
318
|
-
dst_crs =
|
217
|
+
dst_crs = "EPSG:4326"
|
319
218
|
try:
|
320
|
-
left, bottom, right, top = rasterio.warp.transform_bounds(
|
219
|
+
left, bottom, right, top = rasterio.warp.transform_bounds(
|
220
|
+
ds.crs, dst_crs, *bounds
|
221
|
+
)
|
321
222
|
except rasterio.errors.CRSError:
|
322
223
|
# If the raster has no crs, set the bounding box to 0
|
323
224
|
left, bottom, right, top = 0, 0, 0, 0
|
@@ -327,116 +228,238 @@ class STACGenerator:
|
|
327
228
|
|
328
229
|
# Create geojson feature
|
329
230
|
# If the bounding box has no values, set the geometry to None
|
330
|
-
geom = mapping(
|
331
|
-
|
332
|
-
|
333
|
-
[right, top],
|
334
|
-
[right, bottom]
|
335
|
-
]))
|
231
|
+
geom = mapping(
|
232
|
+
Polygon([[left, bottom], [left, top], [right, top], [right, bottom]])
|
233
|
+
)
|
336
234
|
|
337
|
-
# Initialize
|
338
|
-
|
235
|
+
# Initialize pySTAC item parameters
|
236
|
+
params = dict()
|
237
|
+
params["properties"] = dict()
|
339
238
|
|
340
239
|
# Obtain the date acquired
|
341
|
-
|
240
|
+
start_time, end_time = None, None
|
241
|
+
if metadata and metadata["date-adquired"] and metadata["type"] not in ('dem', 'DEM'):
|
342
242
|
time_acquired = format_time_acquired(metadata["date-adquired"])
|
343
243
|
else:
|
344
|
-
#
|
345
|
-
|
346
|
-
|
244
|
+
# Check if the type of the data is DEM
|
245
|
+
if metadata and metadata["type"] and metadata["type"] in ("dem", "DEM"):
|
246
|
+
time_acquired = None
|
247
|
+
start_time = datetime.strptime("2011-01-01", "%Y-%m-%d")
|
248
|
+
end_time = datetime.strptime("2015-01-07", "%Y-%m-%d")
|
249
|
+
params["start_datetime"] = start_time
|
250
|
+
params["end_datetime"] = end_time
|
251
|
+
else:
|
252
|
+
# Set unknown date
|
253
|
+
time_acquired = datetime.strptime("2000-01-01", "%Y-%m-%d")
|
254
|
+
|
347
255
|
# Obtain the item ID. The approach depends on the item parser
|
348
256
|
id = self._item_parser.get_item_id(raster_path)
|
257
|
+
# Add the item ID to the dataframe, to be able to get it later
|
258
|
+
self._stac_dataframe.loc[
|
259
|
+
self._stac_dataframe["image"] == raster_path, "id"
|
260
|
+
] = id
|
349
261
|
|
350
262
|
# Instantiate pystac item
|
351
|
-
item = pystac.Item(
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
extensions = self._stac_dataframe[self._stac_dataframe['image'] == raster_path]['extensions'].values
|
263
|
+
item = pystac.Item(
|
264
|
+
id=id, geometry=geom, bbox=bbox, datetime=time_acquired, **params
|
265
|
+
)
|
266
|
+
|
267
|
+
# Get the item info, from the raster path
|
268
|
+
item_info = self._stac_dataframe[self._stac_dataframe["image"] == raster_path]
|
269
|
+
# Get the extensions of the item
|
270
|
+
extensions = item_info["extensions"].values
|
360
271
|
extensions = extensions[0] if extensions else None
|
272
|
+
|
361
273
|
# Add the required extensions to the item
|
362
274
|
if extensions:
|
363
275
|
if isinstance(extensions, str):
|
364
276
|
extensions = [extensions]
|
365
277
|
for extension in extensions:
|
366
|
-
|
367
|
-
|
278
|
+
if extension not in SUPPORTED_EXTENSIONS:
|
279
|
+
raise ValueError(f"Extension {extension} not supported")
|
280
|
+
else:
|
281
|
+
extension_obj = self._extensions_dict[extension]
|
282
|
+
extension_obj.add_extension_to_object(item, item_info)
|
368
283
|
|
369
284
|
# Add the assets to the item
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
with rasterio.open(raster_path, 'r') as raster:
|
381
|
-
# Get the name of the raster file without extension
|
382
|
-
raster_name = basename(raster_path).split('.')[0]
|
383
|
-
if isinstance(bands, str):
|
384
|
-
bands = [bands]
|
385
|
-
for band in bands:
|
386
|
-
i = bands.index(band)
|
387
|
-
try:
|
388
|
-
single_band = raster.read(i + 1)
|
389
|
-
except IndexError:
|
390
|
-
# TODO put try here for IndexError: band index 2 out of range (not in (1,))
|
391
|
-
# TODO control
|
392
|
-
single_band = raster.read(1)
|
393
|
-
band_name = f'{raster_name}_{band}.{self._image_format}'
|
394
|
-
output_band = join(dirname(raster_path), band_name)
|
395
|
-
# Copy the metadata
|
396
|
-
metadata = raster.meta.copy()
|
397
|
-
metadata.update({"count": 1})
|
398
|
-
# Write the band to the output folder
|
399
|
-
with rasterio.open(output_band, "w", **metadata) as dest:
|
400
|
-
dest.write(single_band, 1)
|
401
|
-
# Instantiate pystac asset
|
402
|
-
asset = pystac.Asset(href=band_name, title=band, media_type=pystac.MediaType.GEOTIFF)
|
403
|
-
# Add the asset to the item
|
404
|
-
item.add_asset(band_name, asset)
|
285
|
+
assets = self._assets_generator.extract_assets(item_info)
|
286
|
+
if not assets:
|
287
|
+
# If there are not assets using the selected generator, try with the default
|
288
|
+
assets = STACAssetGenerator.extract_assets(item_info)
|
289
|
+
|
290
|
+
# Add the assets to the item
|
291
|
+
if assets:
|
292
|
+
for asset in assets:
|
293
|
+
if isinstance(asset, pystac.Asset):
|
294
|
+
item.add_asset(asset.title, asset)
|
405
295
|
# Add the required extensions to the asset if required
|
406
296
|
if extensions:
|
407
297
|
if isinstance(extensions, str):
|
408
298
|
extensions = [extensions]
|
409
299
|
for extension in extensions:
|
410
|
-
|
411
|
-
|
300
|
+
if extension not in SUPPORTED_EXTENSIONS:
|
301
|
+
raise ValueError(f"Extension {extension} not supported")
|
302
|
+
else:
|
303
|
+
extension_obj = self._extensions_dict[extension]
|
304
|
+
extension_obj.add_extension_to_object(asset, item_info)
|
305
|
+
|
306
|
+
item.set_self_href(join(dirname(raster_path), f"{id}.json"))
|
307
|
+
item.make_asset_hrefs_relative()
|
412
308
|
|
413
|
-
|
414
309
|
return item
|
415
310
|
|
416
|
-
def
|
311
|
+
def generate_stac_labels(
|
312
|
+
self,
|
313
|
+
catalog: Union[pystac.Catalog, str],
|
314
|
+
stac_dataframe: Optional[pd.DataFrame] = None,
|
315
|
+
collection: Optional[Union[pystac.Collection, str]] = None,
|
316
|
+
) -> None:
|
417
317
|
"""
|
418
|
-
|
419
|
-
|
420
|
-
:param
|
318
|
+
Generate a labels collection from a STAC dataframe
|
319
|
+
|
320
|
+
:param catalog: catalog to add the labels collection to
|
321
|
+
:param stac_dataframe: dataframe with the STAC metadata of a given directory containing the assets to generate metadata
|
322
|
+
:param collection: collection to add the labels collection to
|
421
323
|
"""
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
324
|
+
self._stac_dataframe = (
|
325
|
+
stac_dataframe if self._stac_dataframe.empty else self._stac_dataframe
|
326
|
+
)
|
327
|
+
if self._stac_dataframe.empty:
|
328
|
+
raise ValueError(
|
329
|
+
"No STAC dataframe provided, please provide a STAC dataframe or generate it with <get_stac_dataframe> method"
|
330
|
+
)
|
331
|
+
if isinstance(catalog, str):
|
332
|
+
catalog = pystac.Catalog.from_file(catalog)
|
333
|
+
|
334
|
+
# Add the labels collection to the catalog
|
335
|
+
# If exists a source collection, get it extent
|
336
|
+
source_collection = catalog.get_child("source")
|
337
|
+
if source_collection:
|
338
|
+
extent = source_collection.extent
|
339
|
+
source_items = source_collection.get_all_items()
|
428
340
|
else:
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
341
|
+
if not collection:
|
342
|
+
raise ValueError(
|
343
|
+
"No source collection provided, please provide a source collection"
|
344
|
+
)
|
345
|
+
extent = get_unknow_extent()
|
346
|
+
|
347
|
+
# Create the labels collection and add it to the catalog if it does not exist
|
348
|
+
# If it exists, remove it
|
349
|
+
collection = pystac.Collection(id="labels", description="Labels", extent=extent)
|
350
|
+
if collection.id in [c.id for c in catalog.get_children()]:
|
351
|
+
catalog.remove_child(collection.id)
|
352
|
+
catalog.add_child(collection)
|
353
|
+
|
354
|
+
# Generate the labels items
|
355
|
+
print("Generating labels collection...")
|
356
|
+
for source_item in tqdm(source_items):
|
357
|
+
# There must be an item ID column in the STAC dataframe
|
358
|
+
if not 'id' in self._stac_dataframe.columns:
|
359
|
+
raise ValueError(
|
360
|
+
"No item ID column found in the STAC dataframe, please provide a STAC dataframe with the item ID column"
|
361
|
+
)
|
362
|
+
label_classes = self._stac_dataframe.label.unique().tolist()
|
363
|
+
|
364
|
+
# Create the label item
|
365
|
+
# TODO put in kwargs
|
366
|
+
label_item = LabelExtensionObject.add_extension_to_item(
|
367
|
+
source_item,
|
368
|
+
label_names=["label"],
|
369
|
+
label_classes=[label_classes],
|
370
|
+
label_properties=["label"],
|
371
|
+
label_description="Item label",
|
372
|
+
label_methods=["manual"],
|
373
|
+
label_tasks=["classification"],
|
374
|
+
label_type="vector"
|
375
|
+
)
|
376
|
+
# Add the self href to the label item, following the Best Practices Layout
|
377
|
+
# https://github.com/radiantearth/stac-spec/blob/master/best-practices.md
|
378
|
+
label_item.set_self_href(
|
379
|
+
join(
|
380
|
+
dirname(collection.get_self_href()),
|
381
|
+
label_item.id,
|
382
|
+
f"{label_item.id}.json"
|
383
|
+
)
|
384
|
+
)
|
385
|
+
collection.add_item(label_item)
|
386
|
+
|
387
|
+
# Add the extension to the collection
|
388
|
+
# TODO put in kwargs
|
389
|
+
LabelExtensionObject.add_extension_to_collection(
|
390
|
+
collection,
|
391
|
+
label_names=["label"],
|
392
|
+
label_classes=[label_classes],
|
393
|
+
label_type="vector",
|
394
|
+
)
|
395
|
+
|
396
|
+
# Validate and save the catalog
|
397
|
+
# Before adding the geojson, we need to save the catalog
|
398
|
+
# and then iterate over the items to add the geojson
|
399
|
+
try:
|
400
|
+
pystac.validation.validate(catalog)
|
401
|
+
catalog.normalize_and_save(dirname(catalog.get_self_href()), self._catalog_type)
|
402
|
+
except pystac.STACValidationError as e:
|
403
|
+
print(f"Catalog validation error: {e}")
|
404
|
+
return
|
441
405
|
|
442
|
-
|
406
|
+
# Add a GeoJSON FeatureCollection to every label item, as recommended by the spec
|
407
|
+
# https://github.com/stac-extensions/label#assets
|
408
|
+
LabelExtensionObject.add_geojson_to_items(collection,
|
409
|
+
self._stac_dataframe)
|
410
|
+
catalog.normalize_and_save(dirname(catalog.get_self_href()), self._catalog_type)
|
411
|
+
|
412
|
+
|
413
|
+
def merge_stac_catalogs(catalog_1: Union[pystac.Catalog, str],
|
414
|
+
catalog_2: Union[pystac.Catalog, str],
|
415
|
+
destination: Optional[str] = None,
|
416
|
+
keep_extensions: Optional[bool] = False,
|
417
|
+
catalog_type: Optional[pystac.CatalogType] = pystac.CatalogType.SELF_CONTAINED
|
418
|
+
) -> None:
|
419
|
+
"""
|
420
|
+
Merge two STAC catalogs, keeping the properties, collection and items of both catalogs
|
421
|
+
|
422
|
+
:param catalog_1: first catalog to merge
|
423
|
+
:param catalog_2: second catalog to merge
|
424
|
+
:param destination: destination folder to save the merged catalog
|
425
|
+
:param keep_extensions: keep the extensions of the first catalog
|
426
|
+
:param catalog_type: type of the catalog
|
427
|
+
"""
|
428
|
+
if isinstance(catalog_1, str):
|
429
|
+
catalog_1 = pystac.Catalog.from_file(catalog_1)
|
430
|
+
if isinstance(catalog_2, str):
|
431
|
+
catalog_2 = pystac.Catalog.from_file(catalog_2)
|
432
|
+
|
433
|
+
for col1 in tqdm(catalog_1.get_children(), desc='Merging catalogs...'):
|
434
|
+
# Check if the collection exists in catalog_2
|
435
|
+
col2 = catalog_2.get_child(col1.id)
|
436
|
+
if col2 is None:
|
437
|
+
# If it does not exist, add it
|
438
|
+
col1_ = col1.clone()
|
439
|
+
catalog_2.add_child(col1)
|
440
|
+
col2 = catalog_2.get_child(col1.id)
|
441
|
+
col2.clear_items()
|
442
|
+
for i in col1_.get_all_items():
|
443
|
+
col2.add_item(i)
|
444
|
+
else:
|
445
|
+
# If it exists, merge the items
|
446
|
+
for item1 in col1.get_items():
|
447
|
+
if col2.get_item(item1.id) is None:
|
448
|
+
col2.add_item(item1)
|
449
|
+
|
450
|
+
if keep_extensions:
|
451
|
+
for ext in catalog_1.stac_extensions:
|
452
|
+
if ext not in catalog_2.stac_extensions:
|
453
|
+
catalog_2.stac_extensions.append(ext)
|
454
|
+
|
455
|
+
for extra_field_name, extra_field_value in catalog_1.extra_fields.items():
|
456
|
+
if extra_field_name not in catalog_2.extra_fields:
|
457
|
+
catalog_2.extra_fields[extra_field_name] = extra_field_value
|
458
|
+
|
459
|
+
if not destination:
|
460
|
+
destination = dirname(catalog_2.get_self_href())
|
461
|
+
rmtree(destination) # Remove the old catalog and replace it with the new one
|
462
|
+
# Save the merged catalog
|
463
|
+
print('Validating...')
|
464
|
+
catalog_2.normalize_and_save(destination, catalog_type)
|
465
|
+
print('Success')
|