eotdl 2023.7.19.post4__py3-none-any.whl → 2023.9.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,67 +2,82 @@
2
2
  Module for generating STAC metadata
3
3
  """
4
4
 
5
+ from typing import Union
5
6
  import pandas as pd
6
- import json
7
7
  import pystac
8
- from random import sample
8
+ from tqdm import tqdm
9
9
 
10
- from os import listdir
11
- from os.path import join, basename, exists, dirname
10
+ from os.path import join, basename, dirname
11
+ from shutil import rmtree
12
12
 
13
13
  import rasterio
14
14
  from rasterio.warp import transform_bounds
15
+ from typing import Union, List
15
16
 
16
17
  from datetime import datetime
17
18
  from shapely.geometry import Polygon, mapping
18
19
  from glob import glob
19
-
20
- from stac_validator.stac_validator import StacValidate
20
+ from typing import Union, Optional
21
21
 
22
22
  from .parsers import STACIdParser, StructuredParser
23
- from .utils import format_time_acquired, count_ocurrences
24
- from .extensions import type_stac_extensions_dict
23
+ from .assets import STACAssetGenerator
24
+ from .utils import (format_time_acquired,
25
+ cut_images,
26
+ get_item_metadata)
27
+ from .extensions import (type_stac_extensions_dict,
28
+ SUPPORTED_EXTENSIONS,
29
+ LabelExtensionObject)
30
+ from .extent import (get_unknow_extent,
31
+ get_collection_extent)
25
32
 
26
33
 
27
34
  class STACGenerator:
28
-
29
- def __init__(self,
30
- image_format: str='tiff',
31
- catalog_type: pystac.CatalogType=pystac.CatalogType.SELF_CONTAINED,
32
- item_parser: STACIdParser=StructuredParser
33
- ) -> None:
35
+ def __init__(
36
+ self,
37
+ image_format: str = "tiff",
38
+ catalog_type: pystac.CatalogType = pystac.CatalogType.SELF_CONTAINED,
39
+ item_parser: STACIdParser = StructuredParser,
40
+ assets_generator: STACAssetGenerator = STACAssetGenerator,
41
+ ) -> None:
34
42
  """
35
43
  Initialize the STAC generator
36
-
44
+
37
45
  :param image_format: image format of the assets
38
46
  :param catalog_type: type of the catalog
39
47
  :param item_parser: parser to get the item ID
48
+ :param assets_generator: generator to generate the assets
40
49
  """
41
50
  self._image_format = image_format
42
51
  self._catalog_type = catalog_type
43
52
  self._item_parser = item_parser()
53
+ self._assets_generator = assets_generator()
44
54
  self._extensions_dict: dict = type_stac_extensions_dict
45
- self._validator = StacValidate(extensions=True)
46
- self._stac_dataframe = None
47
-
48
- def generate_stac_metadata(self,
49
- stac_dataframe: pd.DataFrame,
50
- id: str,
51
- description: str,
52
- output_folder: str='stac',
53
- kwargs: dict={}) -> None:
55
+ self._stac_dataframe = pd.DataFrame()
56
+
57
+ def generate_stac_metadata(
58
+ self,
59
+ id: str,
60
+ description: str,
61
+ stac_dataframe: pd.DataFrame = None,
62
+ output_folder: str = "stac",
63
+ kwargs: dict = {},
64
+ ) -> None:
54
65
  """
55
66
  Generate STAC metadata for a given directory containing the assets to generate metadata
56
67
 
57
- :param stac_dataframe: dataframe with the STAC metadata of a given directory containing the assets to generate metadata
58
68
  :param id: id of the catalog
59
69
  :param description: description of the catalog
70
+ :param stac_dataframe: dataframe with the STAC metadata of a given directory containing the assets to generate metadata
60
71
  :param output_folder: output folder to write the catalog to
61
72
  """
62
- self._stac_dataframe = stac_dataframe
63
-
73
+ self._stac_dataframe = (
74
+ stac_dataframe if self._stac_dataframe.empty else self._stac_dataframe
75
+ )
76
+ if self._stac_dataframe.empty:
77
+ raise ValueError("No STAC dataframe provided")
78
+
64
79
  # Create an empty catalog
65
- catalog = self.create_stac_catalog(id=id, description=description)
80
+ catalog = pystac.Catalog(id=id, description=description, **kwargs)
66
81
 
67
82
  # Add the collections to the catalog
68
83
  collections = self._stac_dataframe.collection.unique()
@@ -72,69 +87,63 @@ class STACGenerator:
72
87
  collection = self.generate_stac_collection(collection_path)
73
88
  # Add the collection to the catalog
74
89
  catalog.add_child(collection)
75
-
90
+
76
91
  # Add the catalog to the root directory
77
92
  catalog.normalize_hrefs(output_folder)
78
93
 
79
94
  # Validate the catalog
95
+ print("Validating and saving catalog...")
80
96
  try:
81
97
  pystac.validation.validate(catalog)
82
98
  catalog.save(catalog_type=self._catalog_type)
99
+ print("Success!")
83
100
  except pystac.STACValidationError as e:
84
- print(f'Catalog validation error: {e}')
101
+ print(f"Catalog validation error: {e}")
85
102
  return
86
103
 
87
- def get_stac_dataframe(self, path: str, bands: dict=None, extensions: dict=None) -> pd.DataFrame:
104
+ def get_stac_dataframe(self,
105
+ path: str,
106
+ collections: Union[str, dict]='source',
107
+ bands: dict=None,
108
+ extensions: dict=None
109
+ ) -> pd.DataFrame:
88
110
  """
89
111
  Get a dataframe with the STAC metadata of a given directory containing the assets to generate metadata
90
112
 
91
113
  :param path: path to the root directory
114
+ :param collections: dictionary with the collections
115
+ :param bands: dictionary with the bands
92
116
  :param extensions: dictionary with the extensions
93
- :param image_format: image format of the assets
94
117
  """
95
118
  images = glob(str(path) + f'/**/*.{self._image_format}', recursive=True)
96
- images = sample(images, 50) # TODO drop this line
119
+ if self._assets_generator.type == 'Extracted':
120
+ images = cut_images(images)
121
+
97
122
  labels, ixs = self._format_labels(images)
98
- bands = self._get_items_list_from_dict(labels, bands)
99
- exts = self._get_items_list_from_dict(labels, extensions)
100
- collections = self._get_images_common_prefix(images)
123
+ bands_values = self._get_items_list_from_dict(labels, bands)
124
+ extensions_values = self._get_items_list_from_dict(labels, extensions)
125
+
126
+ if collections == "source":
127
+ # List of path with the same value repeated as many times as the number of images
128
+ collections_values = [join(path, "source") for i in range(len(images))]
129
+ else:
130
+ try:
131
+ collections_values = [join(path, value) for value in self._get_items_list_from_dict(labels, collections)]
132
+ except TypeError as e:
133
+ # TODO control this error
134
+ raise TypeError(f'Control this error')
101
135
 
102
136
  df = pd.DataFrame({'image': images,
103
137
  'label': labels,
104
138
  'ix': ixs,
105
- 'collection': collections,
106
- 'extensions': exts,
107
- 'bands': bands})
139
+ 'collection': collections_values,
140
+ 'extensions': extensions_values,
141
+ 'bands': bands_values
142
+ })
108
143
 
109
- return df
110
-
111
- def _get_images_common_prefix(self, images: list) -> list:
112
- """
113
- Get the common prefix of a list of images
114
-
115
- :param images: list of images
116
- """
117
- images_common_prefix_dict = dict()
118
-
119
- images_dirs = [dirname(i) for i in images]
120
-
121
- for image in images_dirs:
122
- path = image
123
- common = False
124
- while not common:
125
- n = count_ocurrences(path, images_dirs)
126
- if n > 1:
127
- images_common_prefix_dict[image] = path
128
- common = True
129
- else:
130
- path = dirname(path)
131
-
132
- images_common_prefix_list = list()
133
- for i in images:
134
- images_common_prefix_list.append(images_common_prefix_dict[dirname(i)])
144
+ self._stac_dataframe = df
135
145
 
136
- return images_common_prefix_list
137
-
146
+ return df
138
147
 
139
148
  def _format_labels(self, images):
140
149
  """
@@ -142,10 +151,10 @@ class STACGenerator:
142
151
 
143
152
  :param images: list of images
144
153
  """
145
- labels = [x.split('/')[-1].split('_')[0].split('.')[0] for x in images]
154
+ labels = [x.split("/")[-1].split("_")[0].split(".")[0] for x in images]
146
155
  ixs = [labels.index(x) for x in labels]
147
156
  return labels, ixs
148
-
157
+
149
158
  def _get_items_list_from_dict(self, labels: list, items: dict) -> list:
150
159
  """
151
160
  Get a list of items from a dictionary
@@ -164,160 +173,52 @@ class STACGenerator:
164
173
  items_list.append(None)
165
174
 
166
175
  return items_list
167
-
168
- def _get_collection_extent(self, path: str) -> pystac.Extent:
169
- """
170
- Get the extent of a collection
171
-
172
- :param path: path to the directory
173
- """
174
- # Get the spatial extent of the collection
175
- spatial_extent = self._get_collection_spatial_extent(path)
176
- # Get the temporal interval of the collection
177
- temporal_interval = self._get_collection_temporal_interval(path)
178
- # Create the Extent object
179
- extent = pystac.Extent(spatial=spatial_extent, temporal=temporal_interval)
180
-
181
- return extent
182
-
183
- def _get_collection_spatial_extent(self, path: str) -> pystac.SpatialExtent:
184
- """
185
- Get the spatial extent of a collection
186
-
187
- :param path: path to the directory
188
- """
189
- # Get the bounding boxes of all the rasters in the path
190
- bboxes = list()
191
- # use glob
192
- rasters = glob(f'{path}/**/*.{self._image_format}', recursive=True)
193
- for raster in rasters:
194
- with rasterio.open(raster) as ds:
195
- bounds = ds.bounds
196
- dst_crs = 'EPSG:4326'
197
- try:
198
- left, bottom, right, top = rasterio.warp.transform_bounds(ds.crs, dst_crs, *bounds)
199
- bbox = [left, bottom, right, top]
200
- except rasterio.errors.CRSError:
201
- spatial_extent = pystac.SpatialExtent([[0, 0, 0, 0]])
202
- return spatial_extent
203
- bboxes.append(bbox)
204
- # Get the minimum and maximum values of the bounding boxes
205
- try:
206
- left = min([bbox[0] for bbox in bboxes])
207
- bottom = min([bbox[1] for bbox in bboxes])
208
- right = max([bbox[2] for bbox in bboxes])
209
- top = max([bbox[3] for bbox in bboxes])
210
- spatial_extent = pystac.SpatialExtent([[left, bottom, right, top]])
211
- except ValueError:
212
- spatial_extent = pystac.SpatialExtent([[0, 0, 0, 0]])
213
- finally:
214
- return spatial_extent
215
-
216
- def _get_collection_temporal_interval(self, path: str) -> pystac.TemporalExtent:
217
- """
218
- Get the temporal interval of a collection
219
176
 
220
- :param path: path to the directory
221
- """
222
- # Get all the metadata.json files in the path
223
- metadata_json_files = glob(f'{path}/**/*.json', recursive=True)
224
- if not metadata_json_files:
225
- return self._get_unknow_temporal_interval()
226
-
227
- # Get the temporal interval of every metadata.json file
228
- temporal_intervals = list()
229
- for metadata_json_file in metadata_json_files:
230
- with open(metadata_json_file, 'r') as f:
231
- metadata = json.load(f)
232
- temporal_intervals.append(metadata['date-adquired']) if metadata['date-adquired'] else None
233
- if temporal_intervals: # TODO control in DEM data
234
- try:
235
- # Get the minimum and maximum values of the temporal intervals
236
- min_date = min([datetime.strptime(interval, '%Y-%m-%d') for interval in temporal_intervals])
237
- max_date = max([datetime.strptime(interval, '%Y-%m-%d') for interval in temporal_intervals])
238
- except ValueError:
239
- min_date = datetime.strptime('2000-01-01', '%Y-%m-%d')
240
- max_date = datetime.strptime('2023-12-31', '%Y-%m-%d')
241
- finally:
242
- # Create the temporal interval
243
- temporal_interval = pystac.TemporalExtent([min_date, max_date])
244
- else:
245
- return self._get_unknow_temporal_interval()
246
-
247
- return temporal_interval
248
-
249
- def _get_unknow_temporal_interval(self) -> pystac.TemporalExtent:
250
- """
251
- Get an unknown temporal interval
252
- """
253
- min_date = datetime.strptime('2000-01-01', '%Y-%m-%d')
254
- max_date = datetime.strptime('2023-12-31', '%Y-%m-%d')
255
-
256
- return pystac.TemporalExtent([min_date, max_date])
257
-
258
- def create_stac_catalog(self, id: str, description: str, kwargs: dict={}) -> pystac.Catalog:
259
- """
260
- Create a STAC catalog
261
-
262
- :param id: id of the catalog
263
- :param description: description of the catalog
264
- :param params: additional parameters
265
- """
266
- return pystac.Catalog(id=id, description=description, **kwargs)
267
-
268
- def generate_stac_collection(self, path: str) -> pystac.Collection:
177
+ def generate_stac_collection(self, collection_path: str) -> pystac.Collection:
269
178
  """
270
179
  Generate a STAC collection from a directory containing the assets to generate metadata
271
180
 
272
- :param path: path to the root directory
181
+ :param collection_path: path to the collection
273
182
  """
183
+ # Get the images of the collection, as they are needed to obtain the collection extent
184
+ collection_images = self._stac_dataframe[
185
+ self._stac_dataframe["collection"] == collection_path
186
+ ]["image"]
274
187
  # Get the collection extent
275
- extent = self._get_collection_extent(path)
188
+ extent = get_collection_extent(collection_images)
276
189
  # Create the collection
277
- collection = pystac.Collection(id=basename(path),
278
- description='Collection',
279
- extent=extent)
280
-
281
- for image in self._stac_dataframe.image:
282
- # Check if the path of the image is a child of the path of the collection
283
- if path in image:
284
- # Create the item
285
- item = self.create_stac_item(image)
286
- # Add the item to the collection
287
- collection.add_item(item)
288
-
190
+ collection_id = basename(collection_path)
191
+ collection = pystac.Collection(
192
+ id=collection_id, description="Collection", extent=extent
193
+ )
194
+
195
+ print(f"Generating {collection_id} collection...")
196
+ for image in tqdm(collection_images):
197
+ # Create the item
198
+ item = self.create_stac_item(image)
199
+ # Add the item to the collection
200
+ collection.add_item(item)
201
+
289
202
  # Return the collection
290
203
  return collection
291
204
 
292
- def create_stac_collection(self, id: str, description: str, extent: pystac.Extent, kwargs: dict={}) -> pystac.Collection:
293
- """
294
- Create a STAC collection
295
-
296
- :param id: id of the collection
297
- :param description: description of the collection
298
- :param extent: extent of the collection
299
- :param params: additional parameters
300
- """
301
- return pystac.Collection(id=id, description=description, extent=extent, **kwargs)
302
-
303
- def create_stac_item(self,
304
- raster_path: str,
305
- kwargs: dict={}
306
- ) -> pystac.Item:
205
+ def create_stac_item(self, raster_path: str, kwargs: dict = {}) -> pystac.Item:
307
206
  """
308
207
  Create a STAC item from a directory containing the raster files and the metadata.json file
309
208
 
310
209
  :param raster_path: path to the raster file
311
210
  """
312
211
  # Check if there is any metadata file in the directory associated to the raster file
313
- metadata = self._get_item_metadata(raster_path)
212
+ metadata = get_item_metadata(raster_path)
314
213
 
315
214
  # Obtain the bounding box from the raster
316
215
  with rasterio.open(raster_path) as ds:
317
216
  bounds = ds.bounds
318
- dst_crs = 'EPSG:4326'
217
+ dst_crs = "EPSG:4326"
319
218
  try:
320
- left, bottom, right, top = rasterio.warp.transform_bounds(ds.crs, dst_crs, *bounds)
219
+ left, bottom, right, top = rasterio.warp.transform_bounds(
220
+ ds.crs, dst_crs, *bounds
221
+ )
321
222
  except rasterio.errors.CRSError:
322
223
  # If the raster has no crs, set the bounding box to 0
323
224
  left, bottom, right, top = 0, 0, 0, 0
@@ -327,116 +228,238 @@ class STACGenerator:
327
228
 
328
229
  # Create geojson feature
329
230
  # If the bounding box has no values, set the geometry to None
330
- geom = mapping(Polygon([
331
- [left, bottom],
332
- [left, top],
333
- [right, top],
334
- [right, bottom]
335
- ]))
231
+ geom = mapping(
232
+ Polygon([[left, bottom], [left, top], [right, top], [right, bottom]])
233
+ )
336
234
 
337
- # Initialize properties
338
- properties = dict()
235
+ # Initialize pySTAC item parameters
236
+ params = dict()
237
+ params["properties"] = dict()
339
238
 
340
239
  # Obtain the date acquired
341
- if metadata and metadata["date-adquired"]:
240
+ start_time, end_time = None, None
241
+ if metadata and metadata["date-adquired"] and metadata["type"] not in ('dem', 'DEM'):
342
242
  time_acquired = format_time_acquired(metadata["date-adquired"])
343
243
  else:
344
- # Set unknown date
345
- time_acquired = datetime.strptime('2000-01-01', '%Y-%m-%d')
346
-
244
+ # Check if the type of the data is DEM
245
+ if metadata and metadata["type"] and metadata["type"] in ("dem", "DEM"):
246
+ time_acquired = None
247
+ start_time = datetime.strptime("2011-01-01", "%Y-%m-%d")
248
+ end_time = datetime.strptime("2015-01-07", "%Y-%m-%d")
249
+ params["start_datetime"] = start_time
250
+ params["end_datetime"] = end_time
251
+ else:
252
+ # Set unknown date
253
+ time_acquired = datetime.strptime("2000-01-01", "%Y-%m-%d")
254
+
347
255
  # Obtain the item ID. The approach depends on the item parser
348
256
  id = self._item_parser.get_item_id(raster_path)
257
+ # Add the item ID to the dataframe, to be able to get it later
258
+ self._stac_dataframe.loc[
259
+ self._stac_dataframe["image"] == raster_path, "id"
260
+ ] = id
349
261
 
350
262
  # Instantiate pystac item
351
- item = pystac.Item(id=id,
352
- geometry=geom,
353
- bbox=bbox,
354
- datetime=time_acquired,
355
- properties=properties,
356
- **kwargs)
357
-
358
- # Get the item extension using the dataframe, from the raster path
359
- extensions = self._stac_dataframe[self._stac_dataframe['image'] == raster_path]['extensions'].values
263
+ item = pystac.Item(
264
+ id=id, geometry=geom, bbox=bbox, datetime=time_acquired, **params
265
+ )
266
+
267
+ # Get the item info, from the raster path
268
+ item_info = self._stac_dataframe[self._stac_dataframe["image"] == raster_path]
269
+ # Get the extensions of the item
270
+ extensions = item_info["extensions"].values
360
271
  extensions = extensions[0] if extensions else None
272
+
361
273
  # Add the required extensions to the item
362
274
  if extensions:
363
275
  if isinstance(extensions, str):
364
276
  extensions = [extensions]
365
277
  for extension in extensions:
366
- extension_obj = self._extensions_dict[extension]
367
- extension_obj.add_extension_to_object(item)
278
+ if extension not in SUPPORTED_EXTENSIONS:
279
+ raise ValueError(f"Extension {extension} not supported")
280
+ else:
281
+ extension_obj = self._extensions_dict[extension]
282
+ extension_obj.add_extension_to_object(item, item_info)
368
283
 
369
284
  # Add the assets to the item
370
- # First of all, we need to get the image bands and extract them from the raster
371
- # in order to create the assets
372
- bands = self._stac_dataframe[self._stac_dataframe['image'] == raster_path]['bands'].values
373
- bands = bands[0] if bands else None
374
- if not bands:
375
- # If there is no bands, create a single band asset from the file, assuming thats a singleband raster
376
- href = basename(raster_path)
377
- title = basename(raster_path).split('.')[0]
378
- asset = pystac.Asset(href=href, title=title, media_type=pystac.MediaType.GEOTIFF)
379
- else:
380
- with rasterio.open(raster_path, 'r') as raster:
381
- # Get the name of the raster file without extension
382
- raster_name = basename(raster_path).split('.')[0]
383
- if isinstance(bands, str):
384
- bands = [bands]
385
- for band in bands:
386
- i = bands.index(band)
387
- try:
388
- single_band = raster.read(i + 1)
389
- except IndexError:
390
- # TODO put try here for IndexError: band index 2 out of range (not in (1,))
391
- # TODO control
392
- single_band = raster.read(1)
393
- band_name = f'{raster_name}_{band}.{self._image_format}'
394
- output_band = join(dirname(raster_path), band_name)
395
- # Copy the metadata
396
- metadata = raster.meta.copy()
397
- metadata.update({"count": 1})
398
- # Write the band to the output folder
399
- with rasterio.open(output_band, "w", **metadata) as dest:
400
- dest.write(single_band, 1)
401
- # Instantiate pystac asset
402
- asset = pystac.Asset(href=band_name, title=band, media_type=pystac.MediaType.GEOTIFF)
403
- # Add the asset to the item
404
- item.add_asset(band_name, asset)
285
+ assets = self._assets_generator.extract_assets(item_info)
286
+ if not assets:
287
+ # If there are not assets using the selected generator, try with the default
288
+ assets = STACAssetGenerator.extract_assets(item_info)
289
+
290
+ # Add the assets to the item
291
+ if assets:
292
+ for asset in assets:
293
+ if isinstance(asset, pystac.Asset):
294
+ item.add_asset(asset.title, asset)
405
295
  # Add the required extensions to the asset if required
406
296
  if extensions:
407
297
  if isinstance(extensions, str):
408
298
  extensions = [extensions]
409
299
  for extension in extensions:
410
- extension_obj = self._extensions_dict[extension]
411
- extension_obj.add_extension_to_object(asset)
300
+ if extension not in SUPPORTED_EXTENSIONS:
301
+ raise ValueError(f"Extension {extension} not supported")
302
+ else:
303
+ extension_obj = self._extensions_dict[extension]
304
+ extension_obj.add_extension_to_object(asset, item_info)
305
+
306
+ item.set_self_href(join(dirname(raster_path), f"{id}.json"))
307
+ item.make_asset_hrefs_relative()
412
308
 
413
-
414
309
  return item
415
310
 
416
- def _get_item_metadata(self, raster_path: str) -> str:
311
+ def generate_stac_labels(
312
+ self,
313
+ catalog: Union[pystac.Catalog, str],
314
+ stac_dataframe: Optional[pd.DataFrame] = None,
315
+ collection: Optional[Union[pystac.Collection, str]] = None,
316
+ ) -> None:
417
317
  """
418
- Get the metadata JSON file of a given directory, associated to a raster file
419
-
420
- :param raster_path: path to the raster file
318
+ Generate a labels collection from a STAC dataframe
319
+
320
+ :param catalog: catalog to add the labels collection to
321
+ :param stac_dataframe: dataframe with the STAC metadata of a given directory containing the assets to generate metadata
322
+ :param collection: collection to add the labels collection to
421
323
  """
422
- # Get the directory of the raster file
423
- raster_dir_path = dirname(raster_path)
424
- # Get the metadata JSON file
425
- # Check if there is a metadata.json file in the directory
426
- if 'metadata.json' in listdir(raster_dir_path):
427
- metadata_json = join(raster_dir_path, 'metadata.json')
324
+ self._stac_dataframe = (
325
+ stac_dataframe if self._stac_dataframe.empty else self._stac_dataframe
326
+ )
327
+ if self._stac_dataframe.empty:
328
+ raise ValueError(
329
+ "No STAC dataframe provided, please provide a STAC dataframe or generate it with <get_stac_dataframe> method"
330
+ )
331
+ if isinstance(catalog, str):
332
+ catalog = pystac.Catalog.from_file(catalog)
333
+
334
+ # Add the labels collection to the catalog
335
+ # If exists a source collection, get it extent
336
+ source_collection = catalog.get_child("source")
337
+ if source_collection:
338
+ extent = source_collection.extent
339
+ source_items = source_collection.get_all_items()
428
340
  else:
429
- # If there is no metadata.json file in the directory, check if there is
430
- # a json file with the same name as the raster file
431
- raster_name = raster_path.split('/')[-1]
432
- raster_name = raster_name.split('.')[0]
433
- metadata_json = join(raster_dir_path, f'{raster_name}.json')
434
- if not exists(metadata_json):
435
- # If there is no metadata.json file in the directory, return None
436
- return None
437
-
438
- # Open the metadata.json file and return it
439
- with open(metadata_json, 'r') as f:
440
- metadata = json.load(f)
341
+ if not collection:
342
+ raise ValueError(
343
+ "No source collection provided, please provide a source collection"
344
+ )
345
+ extent = get_unknow_extent()
346
+
347
+ # Create the labels collection and add it to the catalog if it does not exist
348
+ # If it exists, remove it
349
+ collection = pystac.Collection(id="labels", description="Labels", extent=extent)
350
+ if collection.id in [c.id for c in catalog.get_children()]:
351
+ catalog.remove_child(collection.id)
352
+ catalog.add_child(collection)
353
+
354
+ # Generate the labels items
355
+ print("Generating labels collection...")
356
+ for source_item in tqdm(source_items):
357
+ # There must be an item ID column in the STAC dataframe
358
+ if not 'id' in self._stac_dataframe.columns:
359
+ raise ValueError(
360
+ "No item ID column found in the STAC dataframe, please provide a STAC dataframe with the item ID column"
361
+ )
362
+ label_classes = self._stac_dataframe.label.unique().tolist()
363
+
364
+ # Create the label item
365
+ # TODO put in kwargs
366
+ label_item = LabelExtensionObject.add_extension_to_item(
367
+ source_item,
368
+ label_names=["label"],
369
+ label_classes=[label_classes],
370
+ label_properties=["label"],
371
+ label_description="Item label",
372
+ label_methods=["manual"],
373
+ label_tasks=["classification"],
374
+ label_type="vector"
375
+ )
376
+ # Add the self href to the label item, following the Best Practices Layout
377
+ # https://github.com/radiantearth/stac-spec/blob/master/best-practices.md
378
+ label_item.set_self_href(
379
+ join(
380
+ dirname(collection.get_self_href()),
381
+ label_item.id,
382
+ f"{label_item.id}.json"
383
+ )
384
+ )
385
+ collection.add_item(label_item)
386
+
387
+ # Add the extension to the collection
388
+ # TODO put in kwargs
389
+ LabelExtensionObject.add_extension_to_collection(
390
+ collection,
391
+ label_names=["label"],
392
+ label_classes=[label_classes],
393
+ label_type="vector",
394
+ )
395
+
396
+ # Validate and save the catalog
397
+ # Before adding the geojson, we need to save the catalog
398
+ # and then iterate over the items to add the geojson
399
+ try:
400
+ pystac.validation.validate(catalog)
401
+ catalog.normalize_and_save(dirname(catalog.get_self_href()), self._catalog_type)
402
+ except pystac.STACValidationError as e:
403
+ print(f"Catalog validation error: {e}")
404
+ return
441
405
 
442
- return metadata
406
+ # Add a GeoJSON FeatureCollection to every label item, as recommended by the spec
407
+ # https://github.com/stac-extensions/label#assets
408
+ LabelExtensionObject.add_geojson_to_items(collection,
409
+ self._stac_dataframe)
410
+ catalog.normalize_and_save(dirname(catalog.get_self_href()), self._catalog_type)
411
+
412
+
413
+ def merge_stac_catalogs(catalog_1: Union[pystac.Catalog, str],
414
+ catalog_2: Union[pystac.Catalog, str],
415
+ destination: Optional[str] = None,
416
+ keep_extensions: Optional[bool] = False,
417
+ catalog_type: Optional[pystac.CatalogType] = pystac.CatalogType.SELF_CONTAINED
418
+ ) -> None:
419
+ """
420
+ Merge two STAC catalogs, keeping the properties, collection and items of both catalogs
421
+
422
+ :param catalog_1: first catalog to merge
423
+ :param catalog_2: second catalog to merge
424
+ :param destination: destination folder to save the merged catalog
425
+ :param keep_extensions: keep the extensions of the first catalog
426
+ :param catalog_type: type of the catalog
427
+ """
428
+ if isinstance(catalog_1, str):
429
+ catalog_1 = pystac.Catalog.from_file(catalog_1)
430
+ if isinstance(catalog_2, str):
431
+ catalog_2 = pystac.Catalog.from_file(catalog_2)
432
+
433
+ for col1 in tqdm(catalog_1.get_children(), desc='Merging catalogs...'):
434
+ # Check if the collection exists in catalog_2
435
+ col2 = catalog_2.get_child(col1.id)
436
+ if col2 is None:
437
+ # If it does not exist, add it
438
+ col1_ = col1.clone()
439
+ catalog_2.add_child(col1)
440
+ col2 = catalog_2.get_child(col1.id)
441
+ col2.clear_items()
442
+ for i in col1_.get_all_items():
443
+ col2.add_item(i)
444
+ else:
445
+ # If it exists, merge the items
446
+ for item1 in col1.get_items():
447
+ if col2.get_item(item1.id) is None:
448
+ col2.add_item(item1)
449
+
450
+ if keep_extensions:
451
+ for ext in catalog_1.stac_extensions:
452
+ if ext not in catalog_2.stac_extensions:
453
+ catalog_2.stac_extensions.append(ext)
454
+
455
+ for extra_field_name, extra_field_value in catalog_1.extra_fields.items():
456
+ if extra_field_name not in catalog_2.extra_fields:
457
+ catalog_2.extra_fields[extra_field_name] = extra_field_value
458
+
459
+ if not destination:
460
+ destination = dirname(catalog_2.get_self_href())
461
+ rmtree(destination) # Remove the old catalog and replace it with the new one
462
+ # Save the merged catalog
463
+ print('Validating...')
464
+ catalog_2.normalize_and_save(destination, catalog_type)
465
+ print('Success')