eodash_catalog 0.2.1__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of eodash_catalog might be problematic. Click here for more details.

@@ -1,4 +1,4 @@
1
1
  # SPDX-FileCopyrightText: 2024-present Daniel Santillan <daniel.santillan@eox.at>
2
2
  #
3
3
  # SPDX-License-Identifier: MIT
4
- __version__ = "0.2.1"
4
+ __version__ = "0.3.1"
@@ -34,7 +34,7 @@ def max_days_in_month(year, month):
34
34
  return 31
35
35
  if month in (4, 6, 9, 11):
36
36
  return 30
37
- if ((year % 400) == 0) or ((year % 100) != 0) and ((year % 4) == 0):
37
+ if ((year % 400) == 0) or (((year % 100) != 0) and ((year % 4) == 0)):
38
38
  return 29
39
39
  return 28
40
40
 
@@ -260,11 +260,10 @@ class Duration:
260
260
  the two Durations are considered equal.
261
261
  """
262
262
  if isinstance(other, Duration):
263
- if (self.years * 12 + self.months) == (
264
- other.years * 12 + other.months
265
- ) and self.tdelta == other.tdelta:
266
- return True
267
- return False
263
+ return bool(
264
+ self.years * 12 + self.months == other.years * 12 + other.months
265
+ and self.tdelta == other.tdelta
266
+ )
268
267
  # check if other con be compared against timedelta object
269
268
  # will raise an AssertionError when optimisation is off
270
269
  if self.years == 0 and self.months == 0:
@@ -277,11 +276,10 @@ class Duration:
277
276
  the two Durations are considered not equal.
278
277
  """
279
278
  if isinstance(other, Duration):
280
- if (self.years * 12 + self.months) != (
281
- other.years * 12 + other.months
282
- ) or self.tdelta != other.tdelta:
283
- return True
284
- return False
279
+ return bool(
280
+ self.years * 12 + self.months != other.years * 12 + other.months
281
+ or self.tdelta != other.tdelta
282
+ )
285
283
  # check if other can be compared against timedelta object
286
284
  # will raise an AssertionError when optimisation is off
287
285
  if self.years == 0 and self.months == 0:
@@ -27,15 +27,16 @@ from eodash_catalog.stac_handling import (
27
27
  from eodash_catalog.thumbnails import generate_thumbnail
28
28
  from eodash_catalog.utils import (
29
29
  Options,
30
- create_geojson_from_bbox,
30
+ create_geometry_from_bbox,
31
31
  filter_time_entries,
32
32
  format_datetime_to_isostring_zulu,
33
33
  generate_veda_cog_link,
34
- merge_bboxes,
35
34
  parse_datestring_to_tz_aware_datetime,
36
35
  replace_with_env_variables,
37
36
  retrieveExtentFromWCS,
38
37
  retrieveExtentFromWMSWMTS,
38
+ save_items,
39
+ update_extents_from_collection_children,
39
40
  )
40
41
 
41
42
  LOGGER = get_logger(__name__)
@@ -75,13 +76,18 @@ def process_WCS_rasdaman_Endpoint(
75
76
 
76
77
 
77
78
  def process_STAC_Datacube_Endpoint(
78
- catalog_config: dict, endpoint_config: dict, collection_config: dict, catalog: Catalog
79
+ catalog_config: dict,
80
+ endpoint_config: dict,
81
+ collection_config: dict,
82
+ coll_path_rel_to_root_catalog: str,
83
+ catalog: Catalog,
84
+ options: Options,
79
85
  ) -> Collection:
80
86
  collection = get_or_create_collection(
81
87
  catalog, collection_config["Name"], collection_config, catalog_config, endpoint_config
82
88
  )
83
89
  add_visualization_info(collection, collection_config, endpoint_config)
84
-
90
+ coll_path_rel_to_root_catalog = f'{coll_path_rel_to_root_catalog}/{collection_config["Name"]}'
85
91
  stac_endpoint_url = endpoint_config["EndPoint"]
86
92
  if endpoint_config.get("Name") == "xcube":
87
93
  stac_endpoint_url = stac_endpoint_url + endpoint_config.get("StacEndpoint", "")
@@ -114,7 +120,7 @@ def process_STAC_Datacube_Endpoint(
114
120
  # optionally subset time results based on config
115
121
  if query := endpoint_config.get("Query"):
116
122
  datetimes = filter_time_entries(datetimes, query)
117
-
123
+ items = []
118
124
  for dt in datetimes:
119
125
  new_item = Item(
120
126
  id=format_datetime_to_isostring_zulu(dt),
@@ -122,18 +128,25 @@ def process_STAC_Datacube_Endpoint(
122
128
  properties={},
123
129
  geometry=item.geometry,
124
130
  datetime=dt,
131
+ assets={"dummy_asset": Asset(href="")},
125
132
  )
126
133
  add_visualization_info(new_item, collection_config, endpoint_config)
127
- link = collection.add_item(new_item)
128
- # bubble up information we want to the link
129
- link.extra_fields["datetime"] = format_datetime_to_isostring_zulu(dt)
134
+ items.append(new_item)
130
135
 
136
+ save_items(
137
+ collection,
138
+ items,
139
+ options.outputpath,
140
+ catalog_config["id"],
141
+ coll_path_rel_to_root_catalog,
142
+ options.gp,
143
+ )
131
144
  unit = variables.get(endpoint_config.get("Variable")).get("unit")
132
145
  if unit and "yAxis" not in collection_config:
133
146
  collection_config["yAxis"] = unit
134
- if datetimes:
147
+ if datetimes and not options.gp:
135
148
  collection.update_extent_from_items()
136
- else:
149
+ elif not datetimes:
137
150
  LOGGER.warn(f"NO datetimes returned for collection: {collection_id}!")
138
151
 
139
152
  add_collection_information(catalog_config, collection, collection_config)
@@ -145,19 +158,23 @@ def handle_STAC_based_endpoint(
145
158
  catalog_config: dict,
146
159
  endpoint_config: dict,
147
160
  collection_config: dict,
161
+ coll_path_rel_to_root_catalog: str,
148
162
  catalog: Catalog,
149
163
  options: Options,
150
164
  headers=None,
151
165
  ) -> Collection:
166
+ coll_path_rel_to_root_catalog = f'{coll_path_rel_to_root_catalog}/{collection_config["Name"]}'
152
167
  if collection_config.get("Locations"):
153
168
  root_collection = get_or_create_collection(
154
169
  catalog, collection_config["Name"], collection_config, catalog_config, endpoint_config
155
170
  )
156
171
  for location in collection_config["Locations"]:
172
+ identifier = location.get("Identifier", uuid.uuid4())
157
173
  collection = process_STACAPI_Endpoint(
158
174
  catalog_config=catalog_config,
159
175
  endpoint_config=endpoint_config,
160
176
  collection_config=collection_config,
177
+ coll_path_rel_to_root_catalog=f"{coll_path_rel_to_root_catalog}/{identifier}",
161
178
  catalog=catalog,
162
179
  options=options,
163
180
  headers=headers,
@@ -168,7 +185,7 @@ def handle_STAC_based_endpoint(
168
185
  # Update identifier to use location as well as title
169
186
  # TODO: should we use the name as id? it provides much more
170
187
  # information in the clients
171
- collection.id = location.get("Identifier", uuid.uuid4())
188
+ collection.id = identifier
172
189
  collection.title = location.get("Name")
173
190
  # See if description should be overwritten
174
191
  if location.get("Description"):
@@ -192,11 +209,7 @@ def handle_STAC_based_endpoint(
192
209
  location["OverwriteBBox"],
193
210
  ]
194
211
  )
195
- root_collection.update_extent_from_items()
196
- # Add bbox extents from children
197
- for c_child in root_collection.get_children():
198
- if isinstance(c_child, Collection):
199
- root_collection.extent.spatial.bboxes.append(c_child.extent.spatial.bboxes[0])
212
+ update_extents_from_collection_children(root_collection)
200
213
  else:
201
214
  bbox = None
202
215
  if endpoint_config.get("Bbox"):
@@ -205,6 +218,7 @@ def handle_STAC_based_endpoint(
205
218
  catalog_config=catalog_config,
206
219
  endpoint_config=endpoint_config,
207
220
  collection_config=collection_config,
221
+ coll_path_rel_to_root_catalog=coll_path_rel_to_root_catalog,
208
222
  catalog=catalog,
209
223
  options=options,
210
224
  headers=headers,
@@ -221,6 +235,7 @@ def process_STACAPI_Endpoint(
221
235
  catalog_config: dict,
222
236
  endpoint_config: dict,
223
237
  collection_config: dict,
238
+ coll_path_rel_to_root_catalog: str,
224
239
  catalog: Catalog,
225
240
  options: Options,
226
241
  headers: dict[str, str] | None = None,
@@ -251,9 +266,8 @@ def process_STACAPI_Endpoint(
251
266
  )
252
267
  # We keep track of potential duplicate times in this list
253
268
  added_times = {}
254
- any_item_added = False
269
+ items = []
255
270
  for item in results.items():
256
- any_item_added = True
257
271
  item_datetime = item.get_datetime()
258
272
  if item_datetime is not None:
259
273
  iso_date = item_datetime.isoformat()[:10]
@@ -263,7 +277,6 @@ def process_STACAPI_Endpoint(
263
277
  if iso_date in added_times:
264
278
  continue
265
279
  added_times[iso_date] = True
266
- link = collection.add_item(item)
267
280
  if options.tn:
268
281
  if item.assets.get("cog_default"):
269
282
  generate_thumbnail(
@@ -274,12 +287,10 @@ def process_STACAPI_Endpoint(
274
287
  # Check if we can create visualization link
275
288
  if endpoint_config.get("Assets"):
276
289
  add_visualization_info(item, collection_config, endpoint_config, item.id)
277
- link.extra_fields["item"] = item.id
278
290
  elif item.assets.get("cog_default"):
279
291
  add_visualization_info(
280
292
  item, collection_config, endpoint_config, item.assets["cog_default"].href
281
293
  )
282
- link.extra_fields["cog_href"] = item.assets["cog_default"].href
283
294
  elif item_datetime:
284
295
  add_visualization_info(
285
296
  item, collection_config, endpoint_config, datetimes=[item_datetime]
@@ -298,23 +309,31 @@ def process_STACAPI_Endpoint(
298
309
  if root_collection:
299
310
  item.set_collection(root_collection)
300
311
 
301
- # bubble up information we want to the link
302
- # it is possible for datetime to be null, if it is start and end datetime have to exist
303
- if item_datetime:
304
- link.extra_fields["datetime"] = format_datetime_to_isostring_zulu(item_datetime)
305
- else:
306
- link.extra_fields["start_datetime"] = format_datetime_to_isostring_zulu(
307
- parse_datestring_to_tz_aware_datetime(item.properties["start_datetime"])
308
- )
309
- link.extra_fields["end_datetime"] = format_datetime_to_isostring_zulu(
310
- parse_datestring_to_tz_aware_datetime(item.properties["end_datetime"])
311
- )
312
312
  add_projection_info(
313
313
  endpoint_config,
314
314
  item,
315
315
  )
316
- if any_item_added:
317
- collection.update_extent_from_items()
316
+ # we check if the item has any assets, if not we create a dummy asset
317
+ if not item.assets:
318
+ item.assets["dummy_asset"] = Asset(href="")
319
+ if "cog_default" in item.assets and item.assets["cog_default"].extra_fields.get(
320
+ "raster:bands"
321
+ ):
322
+ # saving via pyarrow does not work well with statistics ranges
323
+ # Integer value -10183824872833024 is outside of the range exactly
324
+ # representable by a IEEE 754 double precision value
325
+ item.assets["cog_default"].extra_fields.pop("raster:bands")
326
+ items.append(item)
327
+
328
+ if len(items) > 0:
329
+ save_items(
330
+ collection,
331
+ items,
332
+ options.outputpath,
333
+ catalog_config["id"],
334
+ coll_path_rel_to_root_catalog,
335
+ options.gp,
336
+ )
318
337
  else:
319
338
  LOGGER.warn(
320
339
  f"""NO items returned for
@@ -339,11 +358,17 @@ def handle_VEDA_endpoint(
339
358
  catalog_config: dict,
340
359
  endpoint_config: dict,
341
360
  collection_config: dict,
361
+ coll_path_rel_to_root_catalog: str,
342
362
  catalog: Catalog,
343
363
  options: Options,
344
364
  ) -> Collection:
345
365
  collection = handle_STAC_based_endpoint(
346
- catalog_config, endpoint_config, collection_config, catalog, options
366
+ catalog_config,
367
+ endpoint_config,
368
+ collection_config,
369
+ coll_path_rel_to_root_catalog,
370
+ catalog,
371
+ options,
347
372
  )
348
373
  return collection
349
374
 
@@ -373,12 +398,18 @@ def handle_collection_only(
373
398
 
374
399
 
375
400
  def handle_SH_WMS_endpoint(
376
- catalog_config: dict, endpoint_config: dict, collection_config: dict, catalog: Catalog
401
+ catalog_config: dict,
402
+ endpoint_config: dict,
403
+ collection_config: dict,
404
+ coll_path_rel_to_root_catalog: str,
405
+ catalog: Catalog,
406
+ options: Options,
377
407
  ) -> Collection:
378
408
  # create collection and subcollections (based on locations)
379
409
  root_collection = get_or_create_collection(
380
410
  catalog, collection_config["Name"], collection_config, catalog_config, endpoint_config
381
411
  )
412
+ coll_path_rel_to_root_catalog = f'{coll_path_rel_to_root_catalog}/{collection_config["Name"]}'
382
413
  if collection_config.get("Locations"):
383
414
  for location in collection_config["Locations"]:
384
415
  # create and populate location collections based on times
@@ -391,23 +422,31 @@ def handle_SH_WMS_endpoint(
391
422
  catalog, location["Identifier"], location_config, catalog_config, endpoint_config
392
423
  )
393
424
  collection.extra_fields["endpointtype"] = endpoint_config["Name"]
425
+ items = []
394
426
  for time_string in location["Times"]:
395
427
  dt = parse_datestring_to_tz_aware_datetime(time_string)
396
428
  item = Item(
397
429
  id=format_datetime_to_isostring_zulu(dt),
398
430
  bbox=location["Bbox"],
399
431
  properties={},
400
- geometry=None,
432
+ geometry=create_geometry_from_bbox(location["Bbox"]),
401
433
  datetime=dt,
402
434
  stac_extensions=[
403
435
  "https://stac-extensions.github.io/web-map-links/v1.1.0/schema.json",
404
436
  ],
437
+ assets={"dummy_asset": Asset(href="")},
405
438
  )
406
439
  add_projection_info(endpoint_config, item)
407
440
  add_visualization_info(item, collection_config, endpoint_config, datetimes=[dt])
408
- item_link = collection.add_item(item)
409
- item_link.extra_fields["datetime"] = format_datetime_to_isostring_zulu(dt)
410
-
441
+ items.append(item)
442
+ save_items(
443
+ collection,
444
+ items,
445
+ options.outputpath,
446
+ catalog_config["id"],
447
+ f"{coll_path_rel_to_root_catalog}/{collection.id}",
448
+ options.gp,
449
+ )
411
450
  link = root_collection.add_child(collection)
412
451
  # bubble up information we want to the link
413
452
  latlng = "{},{}".format(location["Point"][1], location["Point"][0]).strip()
@@ -415,38 +454,47 @@ def handle_SH_WMS_endpoint(
415
454
  link.extra_fields["latlng"] = latlng
416
455
  link.extra_fields["country"] = location["Country"]
417
456
  link.extra_fields["city"] = location["Name"]
418
- if location["Times"]:
457
+ if location["Times"] and not options.gp:
419
458
  collection.update_extent_from_items()
420
- else:
459
+ elif not location["Times"]:
421
460
  LOGGER.warn(f"NO datetimes configured for collection: {collection_config['Name']}!")
422
461
  add_visualization_info(collection, collection_config, endpoint_config)
423
462
  add_process_info_child_collection(collection, catalog_config, collection_config)
424
-
425
- root_collection.update_extent_from_items()
426
- # Add bbox extents from children
427
- for c_child in root_collection.get_children():
428
- if isinstance(c_child, Collection):
429
- root_collection.extent.spatial.bboxes.append(c_child.extent.spatial.bboxes[0])
463
+ update_extents_from_collection_children(root_collection)
430
464
  else:
431
465
  # if locations are not provided, treat the collection as a
432
466
  # general proxy to the sentinel hub layer
433
467
  datetimes = get_collection_datetimes_from_config(endpoint_config)
434
468
  bbox = endpoint_config.get("Bbox", [-180, -85, 180, 85])
469
+ items = []
435
470
  for dt in datetimes:
436
471
  item = Item(
437
472
  id=format_datetime_to_isostring_zulu(dt),
438
473
  bbox=bbox,
439
474
  properties={},
440
- geometry=None,
475
+ geometry=create_geometry_from_bbox(bbox),
441
476
  datetime=dt,
442
477
  stac_extensions=[
443
478
  "https://stac-extensions.github.io/web-map-links/v1.1.0/schema.json",
444
479
  ],
480
+ assets={"dummy_asset": Asset(href="")},
445
481
  )
446
482
  add_projection_info(endpoint_config, item)
447
483
  add_visualization_info(item, collection_config, endpoint_config, datetimes=[dt])
448
- item_link = root_collection.add_item(item)
449
- item_link.extra_fields["datetime"] = format_datetime_to_isostring_zulu(dt)
484
+ items.append(item)
485
+ save_items(
486
+ root_collection,
487
+ items,
488
+ options.outputpath,
489
+ catalog_config["id"],
490
+ coll_path_rel_to_root_catalog,
491
+ options.gp,
492
+ )
493
+ # set spatial extent from config
494
+ root_collection.extent.spatial.bboxes = [bbox]
495
+ # set time extent from geodb
496
+ time_extent = [min(datetimes), max(datetimes)]
497
+ root_collection.extent.temporal = TemporalExtent([time_extent])
450
498
  # eodash v4 compatibility
451
499
  add_collection_information(catalog_config, root_collection, collection_config, True)
452
500
  add_visualization_info(root_collection, collection_config, endpoint_config)
@@ -454,13 +502,20 @@ def handle_SH_WMS_endpoint(
454
502
 
455
503
 
456
504
  def handle_xcube_endpoint(
457
- catalog_config: dict, endpoint_config: dict, collection_config: dict, catalog: Catalog
505
+ catalog_config: dict,
506
+ endpoint_config: dict,
507
+ collection_config: dict,
508
+ coll_path_rel_to_root_catalog: str,
509
+ catalog: Catalog,
510
+ options: Options,
458
511
  ) -> Collection:
459
512
  collection = process_STAC_Datacube_Endpoint(
460
513
  catalog_config=catalog_config,
461
514
  endpoint_config=endpoint_config,
462
515
  collection_config=collection_config,
463
516
  catalog=catalog,
517
+ options=options,
518
+ coll_path_rel_to_root_catalog=coll_path_rel_to_root_catalog,
464
519
  )
465
520
 
466
521
  add_example_info(collection, collection_config, endpoint_config, catalog_config)
@@ -468,23 +523,33 @@ def handle_xcube_endpoint(
468
523
 
469
524
 
470
525
  def handle_rasdaman_endpoint(
471
- catalog_config: dict, endpoint_config: dict, collection_config: dict, catalog: Catalog
526
+ catalog_config: dict,
527
+ endpoint_config: dict,
528
+ collection_config: dict,
529
+ coll_path_rel_to_root_catalog: str,
530
+ catalog: Catalog,
472
531
  ) -> Collection:
473
532
  collection = process_WCS_rasdaman_Endpoint(
474
- catalog_config, endpoint_config, collection_config, catalog
533
+ catalog_config, endpoint_config, collection_config, coll_path_rel_to_root_catalog, catalog
475
534
  )
476
535
  # add_example_info(collection, collection_config, endpoint_config, catalog_config)
477
536
  return collection
478
537
 
479
538
 
480
539
  def handle_GeoDB_endpoint(
481
- catalog_config: dict, endpoint_config: dict, collection_config: dict, catalog: Catalog
540
+ catalog_config: dict,
541
+ endpoint_config: dict,
542
+ collection_config: dict,
543
+ coll_path_rel_to_root_catalog: str,
544
+ catalog: Catalog,
545
+ options: Options,
482
546
  ) -> Collection:
483
547
  # ID of collection is data["Name"] instead of CollectionId to be able to
484
548
  # create more STAC collections from one geoDB table
485
549
  collection = get_or_create_collection(
486
550
  catalog, collection_config["Name"], collection_config, catalog_config, endpoint_config
487
551
  )
552
+ coll_path_rel_to_root_catalog = f'{coll_path_rel_to_root_catalog}/{collection_config["Name"]}'
488
553
  select = "?select=aoi,aoi_id,country,city,time,input_data,sub_aoi"
489
554
  url = (
490
555
  endpoint_config["EndPoint"]
@@ -538,6 +603,7 @@ def handle_GeoDB_endpoint(
538
603
  collection, key, sc_config, catalog_config, endpoint_config
539
604
  )
540
605
  if input_data:
606
+ items = []
541
607
  for v in values:
542
608
  # add items based on inputData fields for each time step available in values
543
609
  first_match = next(
@@ -548,14 +614,20 @@ def handle_GeoDB_endpoint(
548
614
  if "sub_aoi" in v and v["sub_aoi"] != "/":
549
615
  # create geometry from wkt
550
616
  geometry = mapping(wkt.loads(v["sub_aoi"]))
617
+ # converting multipolygon to polygon to avoid shapely throwing an exception
618
+ # in collection extent from geoparquet table generation
619
+ # while trying to create a multipolygon extent of all multipolygons
620
+ if geometry["type"] == "MultiPolygon":
621
+ geometry = {"type": "Polygon", "coordinates": geometry["coordinates"][0]}
551
622
  else:
552
- geometry = create_geojson_from_bbox(bbox)
623
+ geometry = create_geometry_from_bbox(bbox)
553
624
  item = Item(
554
625
  id=v["time"],
555
626
  bbox=bbox,
556
627
  properties={},
557
628
  geometry=geometry,
558
629
  datetime=time_object,
630
+ assets={"dummy_asset": Asset(href="")},
559
631
  )
560
632
  if first_match:
561
633
  match first_match["Type"]:
@@ -591,15 +663,15 @@ def handle_GeoDB_endpoint(
591
663
  extra_fields=extra_fields,
592
664
  )
593
665
  item.add_link(link)
594
- itemlink = locations_collection.add_item(item)
595
- itemlink.extra_fields["datetime"] = (
596
- f"{format_datetime_to_isostring_zulu(time_object)}Z"
597
- )
598
-
599
- # add_visualization_info(
600
- # item, collection_config, endpoint_config, file_url=first_match.get("FileUrl")
601
- # )
602
- locations_collection.update_extent_from_items()
666
+ items.append(item)
667
+ save_items(
668
+ locations_collection,
669
+ items,
670
+ options.outputpath,
671
+ catalog_config["id"],
672
+ f"{coll_path_rel_to_root_catalog}/{locations_collection.id}",
673
+ options.gp,
674
+ )
603
675
  else:
604
676
  # set spatial extent from geodb
605
677
  locations_collection.extent.spatial.bboxes = [bbox]
@@ -631,32 +703,9 @@ def handle_GeoDB_endpoint(
631
703
  add_collection_information(catalog_config, collection, collection_config)
632
704
  add_example_info(collection, collection_config, endpoint_config, catalog_config)
633
705
  collection.extra_fields["locations"] = True
634
- if not input_data:
635
- # we have no items, extents of collection need to be updated manually
636
- merged_bbox = merge_bboxes(
637
- [
638
- c_child.extent.spatial.bboxes[0]
639
- for c_child in collection.get_children()
640
- if isinstance(c_child, Collection)
641
- ]
642
- )
643
- collection.extent.spatial.bboxes = [merged_bbox]
644
- # Add bbox extents from children
645
- for c_child in collection.get_children():
646
- if isinstance(c_child, Collection) and merged_bbox != c_child.extent.spatial.bboxes[0]:
647
- collection.extent.spatial.bboxes.append(c_child.extent.spatial.bboxes[0])
648
- # set time extent of collection
649
- individual_datetimes = []
650
- for c_child in collection.get_children():
651
- if isinstance(c_child, Collection) and isinstance(
652
- c_child.extent.temporal.intervals[0], list
653
- ):
654
- individual_datetimes.extend(c_child.extent.temporal.intervals[0]) # type: ignore
655
- time_extent = [min(individual_datetimes), max(individual_datetimes)]
656
- collection.extent.temporal = TemporalExtent([time_extent])
657
- else:
658
- # we can update from items
659
- collection.update_extent_from_items()
706
+
707
+ update_extents_from_collection_children(collection)
708
+
660
709
  collection.summaries = Summaries(
661
710
  {
662
711
  "cities": cities,
@@ -670,6 +719,7 @@ def handle_SH_endpoint(
670
719
  catalog_config: dict,
671
720
  endpoint_config: dict,
672
721
  collection_config: dict,
722
+ coll_path_rel_to_root_catalog: str,
673
723
  catalog: Catalog,
674
724
  options: Options,
675
725
  ) -> Collection:
@@ -682,7 +732,13 @@ def handle_SH_endpoint(
682
732
  endpoint_config["Type"] + "-" + endpoint_config["CollectionId"]
683
733
  )
684
734
  collection = handle_STAC_based_endpoint(
685
- catalog_config, endpoint_config, collection_config, catalog, options, headers
735
+ catalog_config,
736
+ endpoint_config,
737
+ collection_config,
738
+ coll_path_rel_to_root_catalog,
739
+ catalog,
740
+ options,
741
+ headers,
686
742
  )
687
743
  return collection
688
744
 
@@ -691,12 +747,15 @@ def handle_WMS_endpoint(
691
747
  catalog_config: dict,
692
748
  endpoint_config: dict,
693
749
  collection_config: dict,
750
+ coll_path_rel_to_root_catalog: str,
694
751
  catalog: Catalog,
752
+ options: Options,
695
753
  wmts: bool = False,
696
754
  ) -> Collection:
697
755
  collection = get_or_create_collection(
698
756
  catalog, collection_config["Name"], collection_config, catalog_config, endpoint_config
699
757
  )
758
+ coll_path_rel_to_root_catalog = f'{coll_path_rel_to_root_catalog}/{collection_config["Name"]}'
700
759
  datetimes = get_collection_datetimes_from_config(endpoint_config)
701
760
  spatial_extent = collection.extent.spatial.to_dict().get("bbox", [-180, -90, 180, 90])[0]
702
761
  if endpoint_config.get("Type") != "OverwriteTimes" or not endpoint_config.get("OverwriteBBox"):
@@ -712,6 +771,10 @@ def handle_WMS_endpoint(
712
771
  # optionally filter time results
713
772
  if query := endpoint_config.get("Query"):
714
773
  datetimes = filter_time_entries(datetimes, query)
774
+
775
+ # we first collect the items and then decide if to save as geoparquet or individual items
776
+ items = []
777
+
715
778
  # Create an item per time to allow visualization in stac clients
716
779
  if len(datetimes) > 0:
717
780
  for dt in datetimes:
@@ -719,20 +782,29 @@ def handle_WMS_endpoint(
719
782
  id=format_datetime_to_isostring_zulu(dt),
720
783
  bbox=spatial_extent,
721
784
  properties={},
722
- geometry=None,
785
+ geometry=create_geometry_from_bbox(spatial_extent),
723
786
  datetime=dt,
724
787
  stac_extensions=[
725
788
  "https://stac-extensions.github.io/web-map-links/v1.1.0/schema.json",
726
789
  ],
790
+ assets={"dummy_asset": Asset(href="")},
727
791
  )
728
792
  add_projection_info(endpoint_config, item)
729
793
  add_visualization_info(item, collection_config, endpoint_config, datetimes=[dt])
730
- link = collection.add_item(item)
731
- link.extra_fields["datetime"] = format_datetime_to_isostring_zulu(dt)
732
- collection.update_extent_from_items()
794
+ items.append(item)
733
795
  else:
734
796
  LOGGER.warn(f"NO datetimes returned for collection: {collection_config['Name']}!")
735
797
 
798
+ # Save items either into collection as individual items or as geoparquet
799
+ save_items(
800
+ collection,
801
+ items,
802
+ options.outputpath,
803
+ catalog_config["id"],
804
+ coll_path_rel_to_root_catalog,
805
+ options.gp,
806
+ )
807
+
736
808
  # Check if we should overwrite bbox
737
809
  if endpoint_config.get("OverwriteBBox"):
738
810
  collection.extent.spatial = SpatialExtent(
@@ -758,7 +830,11 @@ def generate_veda_tiles_link(endpoint_config: dict, item: str | None) -> str:
758
830
  if endpoint_config.get("NoData"):
759
831
  no_data = "&no_data={}".format(endpoint_config["NoData"])
760
832
  item = item if item else "{item}"
761
- target_url = f"https://openveda.cloud/api/raster/collections/{collection}/items/{item}/tiles/WebMercatorQuad/{{z}}/{{x}}/{{y}}?{assets}{color_formula}{no_data}"
833
+ target_url_base = endpoint_config["EndPoint"].replace("/stac/", "")
834
+ target_url = (
835
+ f"{target_url_base}/raster/collections/{collection}/items/{item}"
836
+ f"/tiles/WebMercatorQuad/{{z}}/{{x}}/{{y}}?{assets}{color_formula}{no_data}"
837
+ )
762
838
  return target_url
763
839
 
764
840
 
@@ -1071,12 +1147,16 @@ def handle_raw_source(
1071
1147
  catalog_config: dict,
1072
1148
  endpoint_config: dict,
1073
1149
  collection_config: dict,
1150
+ coll_path_rel_to_root_catalog: str,
1074
1151
  catalog: Catalog,
1152
+ options: Options,
1075
1153
  ) -> Collection:
1076
1154
  collection = get_or_create_collection(
1077
1155
  catalog, collection_config["Name"], collection_config, catalog_config, endpoint_config
1078
1156
  )
1157
+ coll_path_rel_to_root_catalog = f'{coll_path_rel_to_root_catalog}/{collection_config["Name"]}'
1079
1158
  if len(endpoint_config.get("TimeEntries", [])) > 0:
1159
+ items = []
1080
1160
  style_link = None
1081
1161
  for time_entry in endpoint_config["TimeEntries"]:
1082
1162
  assets = {}
@@ -1099,7 +1179,7 @@ def handle_raw_source(
1099
1179
  id=format_datetime_to_isostring_zulu(dt),
1100
1180
  bbox=bbox,
1101
1181
  properties={},
1102
- geometry=create_geojson_from_bbox(bbox)["features"][0]["geometry"],
1182
+ geometry=create_geometry_from_bbox(bbox),
1103
1183
  datetime=dt,
1104
1184
  assets=assets,
1105
1185
  extra_fields={},
@@ -1125,13 +1205,19 @@ def handle_raw_source(
1125
1205
  },
1126
1206
  )
1127
1207
  item.add_link(style_link)
1128
- link = collection.add_item(item)
1129
- link.extra_fields["datetime"] = format_datetime_to_isostring_zulu(dt)
1130
- link.extra_fields["assets"] = [a["File"] for a in time_entry["Assets"]]
1208
+ items.append(item)
1209
+
1210
+ save_items(
1211
+ collection,
1212
+ items,
1213
+ options.outputpath,
1214
+ catalog_config["id"],
1215
+ coll_path_rel_to_root_catalog,
1216
+ options.gp,
1217
+ )
1131
1218
  # eodash v4 compatibility, adding last referenced style to collection
1132
1219
  if style_link:
1133
1220
  collection.add_link(style_link)
1134
- collection.update_extent_from_items()
1135
1221
  else:
1136
1222
  LOGGER.warn(f"NO datetimes configured for collection: {collection_config['Name']}!")
1137
1223
 
@@ -6,12 +6,11 @@ Indicator generator to harvest information from endpoints and generate catalog
6
6
 
7
7
  import os
8
8
  import time
9
- from datetime import datetime
10
9
  from typing import Any
11
10
 
12
11
  import click
13
12
  from dotenv import load_dotenv
14
- from pystac import Catalog, CatalogType, Collection, Link, Summaries, TemporalExtent
13
+ from pystac import Catalog, CatalogType, Collection, Link, Summaries
15
14
  from pystac.layout import TemplateLayoutStrategy
16
15
  from pystac.validation import validate_all
17
16
  from structlog import get_logger
@@ -41,10 +40,10 @@ from eodash_catalog.utils import (
41
40
  RaisingThread,
42
41
  add_single_item_if_collection_empty,
43
42
  iter_len_at_least,
44
- merge_bboxes,
45
43
  read_config_file,
46
44
  recursive_save,
47
45
  retry,
46
+ update_extents_from_collection_children,
48
47
  )
49
48
 
50
49
  # make sure we are loading the env local definition
@@ -99,10 +98,10 @@ def process_catalog_file(file_path: str, options: Options):
99
98
 
100
99
  LOGGER.info("Started creation of collection files")
101
100
  start = time.time()
102
- if options.ni or options.gp:
101
+ if options.ni:
103
102
  catalog_self_href = f'{options.outputpath}/{catalog_config["id"]}'
104
103
  catalog.normalize_hrefs(catalog_self_href, strategy=strategy)
105
- recursive_save(catalog, options.ni, options.gp)
104
+ recursive_save(catalog, options.ni)
106
105
  else:
107
106
  # For full catalog save with items this still seems to be faster
108
107
  catalog_self_href = catalog_config.get(
@@ -170,6 +169,7 @@ def process_indicator_file(
170
169
  catalog, indicator_config["Name"], indicator_config, catalog_config, {}
171
170
  )
172
171
  if indicator_config.get("Collections"):
172
+ coll_path_rel_to_root_catalog = indicator_config["Name"]
173
173
  for collection in indicator_config["Collections"]:
174
174
  process_collection_file(
175
175
  catalog_config,
@@ -177,6 +177,7 @@ def process_indicator_file(
177
177
  parent_indicator,
178
178
  options,
179
179
  "Disable" in indicator_config and collection in indicator_config["Disable"],
180
+ coll_path_rel_to_root_catalog,
180
181
  )
181
182
  else:
182
183
  # we assume that collection files can also be loaded directly
@@ -186,29 +187,7 @@ def process_indicator_file(
186
187
  parent_indicator.update_extent_from_items()
187
188
  # get shared extent of all of the collections
188
189
  # they might have OverwriteBBox and that would discard it for indicator
189
- merged_bbox = merge_bboxes(
190
- [
191
- c_child.extent.spatial.bboxes[0]
192
- for c_child in parent_indicator.get_children()
193
- if isinstance(c_child, Collection)
194
- ]
195
- )
196
- parent_indicator.extent.spatial.bboxes = [merged_bbox]
197
- # Add bbox extents from children
198
- for c_child in parent_indicator.get_children():
199
- if isinstance(c_child, Collection) and merged_bbox != c_child.extent.spatial.bboxes[0]:
200
- parent_indicator.extent.spatial.bboxes.append(c_child.extent.spatial.bboxes[0])
201
- # aggregate all time extents from the child collections and make it a indicator extent
202
- individual_datetimes: list[datetime] = []
203
- for c_child in parent_indicator.get_children():
204
- if isinstance(c_child, Collection) and isinstance(
205
- c_child.extent.temporal.intervals[0], list
206
- ):
207
- individual_datetimes.extend(c_child.extent.temporal.intervals[0]) # type: ignore
208
- # filter out None
209
- individual_datetimes = list(filter(lambda x: x is not None, individual_datetimes))
210
- time_extent = [min(individual_datetimes), max(individual_datetimes)]
211
- parent_indicator.extent.temporal = TemporalExtent([time_extent])
190
+ update_extents_from_collection_children(parent_indicator)
212
191
  # extract collection information and add it to summary indicator level
213
192
  extract_indicator_info(parent_indicator)
214
193
  add_process_info(parent_indicator, catalog_config, indicator_config)
@@ -224,49 +203,95 @@ def process_collection_file(
224
203
  catalog: Catalog | Collection,
225
204
  options: Options,
226
205
  disable=False,
206
+ coll_path_rel_to_root_catalog: str = "",
227
207
  ):
228
208
  LOGGER.info(f"Processing collection: {file_path}")
229
209
  collection_config = read_config_file(file_path)
210
+ if not coll_path_rel_to_root_catalog:
211
+ # case when a single collection made the indicator
212
+ coll_path_rel_to_root_catalog = collection_config["Name"]
230
213
  if collection_config.get("Resources"):
231
214
  for endpoint_config in collection_config["Resources"]:
232
215
  try:
233
216
  collection = None
234
217
  if endpoint_config["Name"] == "Sentinel Hub":
235
218
  collection = handle_SH_endpoint(
236
- catalog_config, endpoint_config, collection_config, catalog, options
219
+ catalog_config,
220
+ endpoint_config,
221
+ collection_config,
222
+ coll_path_rel_to_root_catalog,
223
+ catalog,
224
+ options,
237
225
  )
238
226
  elif endpoint_config["Name"] == "Sentinel Hub WMS":
239
227
  collection = handle_SH_WMS_endpoint(
240
- catalog_config, endpoint_config, collection_config, catalog
228
+ catalog_config,
229
+ endpoint_config,
230
+ collection_config,
231
+ coll_path_rel_to_root_catalog,
232
+ catalog,
233
+ options,
241
234
  )
242
235
  elif endpoint_config["Name"] == "GeoDB":
243
236
  collection = handle_GeoDB_endpoint(
244
- catalog_config, endpoint_config, collection_config, catalog
237
+ catalog_config,
238
+ endpoint_config,
239
+ collection_config,
240
+ coll_path_rel_to_root_catalog,
241
+ catalog,
242
+ options,
245
243
  )
246
244
  elif endpoint_config["Name"] == "VEDA":
247
245
  collection = handle_VEDA_endpoint(
248
- catalog_config, endpoint_config, collection_config, catalog, options
246
+ catalog_config,
247
+ endpoint_config,
248
+ collection_config,
249
+ coll_path_rel_to_root_catalog,
250
+ catalog,
251
+ options,
249
252
  )
250
253
  elif endpoint_config["Name"] == "marinedatastore":
251
254
  collection = handle_WMS_endpoint(
252
- catalog_config, endpoint_config, collection_config, catalog, wmts=True
255
+ catalog_config,
256
+ endpoint_config,
257
+ collection_config,
258
+ coll_path_rel_to_root_catalog,
259
+ catalog,
260
+ options,
261
+ wmts=True,
253
262
  )
254
263
  elif endpoint_config["Name"] == "xcube":
255
264
  collection = handle_xcube_endpoint(
256
- catalog_config, endpoint_config, collection_config, catalog
265
+ catalog_config,
266
+ endpoint_config,
267
+ collection_config,
268
+ coll_path_rel_to_root_catalog,
269
+ catalog,
270
+ options,
257
271
  )
258
272
  elif endpoint_config["Name"] == "rasdaman":
259
273
  collection = handle_rasdaman_endpoint(
260
- catalog_config, endpoint_config, collection_config, catalog
274
+ catalog_config, endpoint_config, collection_config, catalog, options
261
275
  )
262
276
  elif endpoint_config["Name"] == "WMS":
263
277
  collection = handle_WMS_endpoint(
264
- catalog_config, endpoint_config, collection_config, catalog
278
+ catalog_config,
279
+ endpoint_config,
280
+ collection_config,
281
+ coll_path_rel_to_root_catalog,
282
+ catalog,
283
+ options,
265
284
  )
266
285
  elif endpoint_config["Name"] == "JAXA_WMTS_PALSAR":
267
286
  # somewhat one off creation of individual WMTS layers as individual items
268
287
  collection = handle_WMS_endpoint(
269
- catalog_config, endpoint_config, collection_config, catalog, wmts=True
288
+ catalog_config,
289
+ endpoint_config,
290
+ collection_config,
291
+ coll_path_rel_to_root_catalog,
292
+ catalog,
293
+ options,
294
+ wmts=True,
270
295
  )
271
296
  elif endpoint_config["Name"] == "Collection-only":
272
297
  collection = handle_collection_only(
@@ -285,12 +310,19 @@ def process_collection_file(
285
310
  "FlatGeobuf source",
286
311
  ]:
287
312
  collection = handle_raw_source(
288
- catalog_config, endpoint_config, collection_config, catalog
313
+ catalog_config,
314
+ endpoint_config,
315
+ collection_config,
316
+ coll_path_rel_to_root_catalog,
317
+ catalog,
318
+ options,
289
319
  )
290
320
  else:
291
321
  raise ValueError("Type of Resource is not supported")
292
322
  if collection:
293
- add_single_item_if_collection_empty(endpoint_config, collection)
323
+ # check if geoparquet flag is used, as these collections have no items
324
+ if not options.gp:
325
+ add_single_item_if_collection_empty(endpoint_config, collection)
294
326
  add_projection_info(endpoint_config, collection)
295
327
  add_to_catalog(collection, catalog, endpoint_config, collection_config, disable)
296
328
  else:
@@ -304,7 +336,9 @@ def process_collection_file(
304
336
  parent_collection = get_or_create_collection(
305
337
  catalog, collection_config["Name"], collection_config, catalog_config, {}
306
338
  )
307
-
339
+ coll_path_rel_to_root_catalog = (
340
+ f'{coll_path_rel_to_root_catalog}/{collection_config["Name"]}'
341
+ )
308
342
  locations = []
309
343
  countries = []
310
344
  for sub_coll_def in collection_config["Subcollections"]:
@@ -316,11 +350,16 @@ def process_collection_file(
316
350
  countries.extend(sub_coll_def["Country"])
317
351
  else:
318
352
  countries.append(sub_coll_def["Country"])
353
+ coll_path_rel_to_root_catalog = (
354
+ f"{coll_path_rel_to_root_catalog}/{sub_coll_def['Collection']}"
355
+ )
319
356
  process_collection_file(
320
357
  catalog_config,
321
358
  "{}/{}".format(options.collectionspath, sub_coll_def["Collection"]),
322
359
  parent_collection,
323
360
  options,
361
+ False,
362
+ coll_path_rel_to_root_catalog,
324
363
  )
325
364
  # find link in parent collection to update metadata
326
365
  for link in parent_collection.links:
@@ -345,11 +384,16 @@ def process_collection_file(
345
384
  else:
346
385
  # create temp catalog to save collection
347
386
  tmp_catalog = Catalog(id="tmp_catalog", description="temp catalog placeholder")
387
+ coll_path_rel_to_root_catalog = (
388
+ f"{coll_path_rel_to_root_catalog}/{sub_coll_def['Collection']}"
389
+ )
348
390
  process_collection_file(
349
391
  catalog_config,
350
392
  "{}/{}".format(options.collectionspath, sub_coll_def["Collection"]),
351
393
  tmp_catalog,
352
394
  options,
395
+ None,
396
+ coll_path_rel_to_root_catalog,
353
397
  )
354
398
  links = tmp_catalog.get_child(sub_coll_def["Identifier"]).get_links() # type: ignore
355
399
  for link in links:
@@ -366,11 +410,7 @@ def process_collection_file(
366
410
 
367
411
  add_collection_information(catalog_config, parent_collection, collection_config, True)
368
412
  add_process_info(parent_collection, catalog_config, collection_config)
369
- parent_collection.update_extent_from_items()
370
- # Add bbox extents from children
371
- for c_child in parent_collection.get_children():
372
- if isinstance(c_child, Collection):
373
- parent_collection.extent.spatial.bboxes.append(c_child.extent.spatial.bboxes[0])
413
+ update_extents_from_collection_children(parent_collection)
374
414
  # Fill summaries for locations
375
415
  parent_collection.summaries = Summaries(
376
416
  {
eodash_catalog/utils.py CHANGED
@@ -11,14 +11,17 @@ from decimal import Decimal
11
11
  from functools import reduce, wraps
12
12
  from typing import Any
13
13
 
14
+ import pyarrow.compute as pc
14
15
  import stac_geoparquet as stacgp
15
16
  import yaml
16
17
  from dateutil import parser
17
18
  from owslib.wcs import WebCoverageService
18
19
  from owslib.wms import WebMapService
19
20
  from owslib.wmts import WebMapTileService
20
- from pystac import Asset, Catalog, Collection, Item, Link, RelType
21
+ from pystac import Asset, Catalog, Collection, Item, Link, RelType, SpatialExtent, TemporalExtent
21
22
  from pytz import timezone as pytztimezone
23
+ from shapely import geometry as sgeom
24
+ from shapely import wkb
22
25
  from six import string_types
23
26
  from structlog import get_logger
24
27
 
@@ -45,7 +48,15 @@ def create_geojson_point(lon: int | float, lat: int | float) -> dict[str, Any]:
45
48
  return {"type": "Feature", "geometry": point, "properties": {}}
46
49
 
47
50
 
48
- def create_geojson_from_bbox(bbox: list[float | int]) -> dict:
51
+ def create_geometry_from_bbox(bbox: list[float | int]) -> dict:
52
+ """
53
+ Create a GeoJSON geometry from a bounding box.
54
+ Args:
55
+ bbox (list[float | int]): A list containing the bounding box coordinates in the format
56
+ [min_lon, min_lat, max_lon, max_lat].
57
+ Returns:
58
+ dict: A GeoJSON geometry object representing the bounding box.
59
+ """
49
60
  coordinates = [
50
61
  [bbox[0], bbox[1]],
51
62
  [bbox[2], bbox[1]],
@@ -53,11 +64,7 @@ def create_geojson_from_bbox(bbox: list[float | int]) -> dict:
53
64
  [bbox[0], bbox[3]],
54
65
  [bbox[0], bbox[1]],
55
66
  ]
56
- polygon = {"type": "Polygon", "coordinates": [coordinates]}
57
-
58
- feature = {"type": "Feature", "geometry": polygon, "properties": {}}
59
- feature_collection = {"type": "FeatureCollection", "features": [feature]}
60
- return feature_collection
67
+ return {"type": "Polygon", "coordinates": [coordinates]}
61
68
 
62
69
 
63
70
  def retrieveExtentFromWCS(
@@ -242,45 +249,11 @@ def recursive_save(stac_object: Catalog, no_items: bool = False, geo_parquet: bo
242
249
  for child in stac_object.get_children():
243
250
  recursive_save(child, no_items, geo_parquet)
244
251
  if not no_items:
245
- if geo_parquet:
246
- create_geoparquet_items(stac_object)
247
- else:
248
- for item in stac_object.get_items():
249
- item.save_object()
252
+ for item in stac_object.get_items():
253
+ item.save_object()
250
254
  stac_object.save_object()
251
255
 
252
256
 
253
- def create_geoparquet_items(stacObject: Catalog):
254
- if iter_len_at_least(stacObject.get_items(), 1):
255
- stac_dir_arr = stacObject.self_href.split("/")
256
- stac_dir_arr.pop()
257
- stac_dir_path = "/".join(stac_dir_arr)
258
- items_stacgp_path = f"{stac_dir_path}/items.parquet"
259
- to_stac_geoparquet(stacObject, items_stacgp_path)
260
- gp_link = Link(
261
- rel="items",
262
- target=items_stacgp_path,
263
- media_type="application/vnd.apache.parquet",
264
- title="GeoParquet Items",
265
- )
266
- stacObject.clear_links(rel="item")
267
- stacObject.add_links([gp_link])
268
-
269
-
270
- def to_stac_geoparquet(stacObject: Catalog, path: str):
271
- items = []
272
- for item in stacObject.get_items():
273
- if not item.geometry:
274
- item.geometry = create_geojson_point(0, 0)["geometry"]
275
- if not item.assets:
276
- item.assets = {"dummy_asset": Asset(href="")}
277
- items.append(item.to_dict())
278
- record_batch_reader = stacgp.arrow.parse_stac_items_to_arrow(items)
279
- table = record_batch_reader.read_all()
280
- os.makedirs(os.path.dirname(path), exist_ok=True)
281
- stacgp.arrow.to_parquet(table, path)
282
-
283
-
284
257
  def iter_len_at_least(i, n: int) -> int:
285
258
  return sum(1 for _ in zip(range(n), i, strict=False)) == n
286
259
 
@@ -317,8 +290,12 @@ def generate_veda_cog_link(endpoint_config: dict, file_url: str | None) -> str:
317
290
  )
318
291
 
319
292
  file_url = f"url={file_url}&" if file_url else ""
320
-
321
- target_url = f"https://openveda.cloud/api/raster/cog/tiles/WebMercatorQuad/{{z}}/{{x}}/{{y}}?{file_url}resampling_method=nearest{bidx}{colormap}{colormap_name}{rescale}{Nodata}"
293
+ target_url_base = endpoint_config["EndPoint"].replace("/stac/", "")
294
+ target_url = (
295
+ f"{target_url_base}/raster/cog/tiles/WebMercatorQuad/{{z}}/{{x}}/{{y}}?"
296
+ f"{file_url}resampling_method=nearest"
297
+ f"{bidx}{colormap}{colormap_name}{rescale}{Nodata}"
298
+ )
322
299
  return target_url
323
300
 
324
301
 
@@ -439,6 +416,154 @@ def get_full_url(url: str, catalog_config) -> str:
439
416
  return f'{catalog_config["assets_endpoint"]}{url}'
440
417
 
441
418
 
419
+ def update_extents_from_collection_children(collection: Collection):
420
+ # retrieve extents from children
421
+ c_bboxes = [
422
+ c_child.extent.spatial.bboxes[0]
423
+ for c_child in collection.get_children()
424
+ if isinstance(c_child, Collection)
425
+ ]
426
+ if len(c_bboxes) > 0:
427
+ merged_bbox = merge_bboxes(c_bboxes)
428
+ else:
429
+ LOGGER.warn(
430
+ "No bounding boxes found in children of collection, using default bbox",
431
+ )
432
+ merged_bbox = [-180.0, -90.0, 180.0, 90.0]
433
+
434
+ collection.extent.spatial.bboxes = [merged_bbox]
435
+ # Add bbox extents from children
436
+ for c_child in collection.get_children():
437
+ if isinstance(c_child, Collection) and merged_bbox != c_child.extent.spatial.bboxes[0]:
438
+ collection.extent.spatial.bboxes.append(c_child.extent.spatial.bboxes[0])
439
+ # set time extent of collection
440
+ individual_datetimes = []
441
+ for c_child in collection.get_children():
442
+ if isinstance(c_child, Collection) and isinstance(
443
+ c_child.extent.temporal.intervals[0], list
444
+ ):
445
+ individual_datetimes.extend(c_child.extent.temporal.intervals[0]) # type: ignore
446
+ individual_datetimes = list(filter(lambda x: x is not None, individual_datetimes))
447
+ time_extent = [min(individual_datetimes), max(individual_datetimes)]
448
+ collection.extent.temporal = TemporalExtent([time_extent])
449
+
450
+
451
+ def save_items(
452
+ collection: Collection,
453
+ items: list[Item],
454
+ output_path: str,
455
+ catalog_id: str,
456
+ colpath: str,
457
+ use_geoparquet: bool = False,
458
+ ) -> None:
459
+ """
460
+ Save a list of items for a collection either as single geoparquet or
461
+ by adding them to the collection in order to be saved by pystac as individual items.
462
+ Args:
463
+ collection (Collection): The collection to which the items will be added.
464
+ items (list[Item]): The list of items to save.
465
+ output_path (str): The path where the items will be saved.
466
+ catalog_id (str): The ID of the catalog to which the collection belongs.
467
+ colpath (str): The expected path where to save the files relative to the catalog root.
468
+ use_geoparquet (bool): If True, save items as a single GeoParquet file.
469
+ If False, add items to the collection and save them individually.
470
+ """
471
+ if len(items) == 0:
472
+ LOGGER.info(
473
+ "No items to save for collection, adding placeholder extents",
474
+ collection_id=collection.id,
475
+ item_count=len(items),
476
+ )
477
+ # we need to add some generic extent to the collection
478
+ collection.extent.spatial = SpatialExtent([[-180.0, -90.0, 180.0, 90.0]])
479
+ collection.extent.temporal = TemporalExtent(
480
+ [
481
+ datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytztimezone("UTC")),
482
+ datetime.now(tz=pytztimezone("UTC")),
483
+ ]
484
+ )
485
+ return
486
+ if use_geoparquet:
487
+ LOGGER.info(
488
+ "Saving items as GeoParquet file",
489
+ collection_id=collection.id,
490
+ item_count=len(items),
491
+ )
492
+ if colpath is None:
493
+ colpath = f"{collection.id}/{collection.id}"
494
+ buildcatpath = f"{output_path}/{catalog_id}"
495
+ record_batch_reader = stacgp.arrow.parse_stac_items_to_arrow(items)
496
+ table = record_batch_reader.read_all()
497
+ output_path = f"{buildcatpath}/{colpath}"
498
+ os.makedirs(output_path, exist_ok=True)
499
+ stacgp.arrow.to_parquet(table, f"{output_path}/items.parquet")
500
+ gp_link = Link(
501
+ rel="items",
502
+ target="./items.parquet",
503
+ media_type="application/vnd.apache.parquet",
504
+ title="GeoParquet Items",
505
+ )
506
+ collection.add_link(gp_link)
507
+ # add extent information to the collection
508
+ min_datetime = pc.min(table["datetime"]).as_py()
509
+ max_datetime = pc.max(table["datetime"]).as_py()
510
+ if not min_datetime:
511
+ # cases when datetime was null
512
+ # fallback to start_datetime
513
+ min_datetime = pc.min(table["start_datetime"]).as_py()
514
+ max_datetime = pc.max(table["start_datetime"]).as_py()
515
+ collection.extent.temporal = TemporalExtent([min_datetime, max_datetime])
516
+ geoms = [wkb.loads(g.as_py()) for g in table["geometry"] if g is not None]
517
+ bbox = sgeom.MultiPolygon(geoms).bounds
518
+ collection.extent.spatial = SpatialExtent([bbox])
519
+ # Make sure to also reference the geoparquet as asset
520
+ collection.add_asset(
521
+ "geoparquet",
522
+ Asset(
523
+ href="./items.parquet",
524
+ media_type="application/vnd.apache.parquet",
525
+ title="GeoParquet Items",
526
+ roles=["collection-mirror"],
527
+ ),
528
+ )
529
+ else:
530
+ # go over items and add them to the collection
531
+ LOGGER.info(
532
+ "Adding items to collection to be saved individually",
533
+ collection_id=collection.id,
534
+ item_count=len(items),
535
+ )
536
+ for item in items:
537
+ link = collection.add_item(item)
538
+ # bubble up information we want to the link
539
+ # it is possible for datetime to be null, if it is start and end datetime have to exist
540
+ item_datetime = item.get_datetime()
541
+ if item_datetime:
542
+ link.extra_fields["datetime"] = format_datetime_to_isostring_zulu(item_datetime)
543
+ else:
544
+ link.extra_fields["start_datetime"] = format_datetime_to_isostring_zulu(
545
+ parse_datestring_to_tz_aware_datetime(item.properties["start_datetime"])
546
+ )
547
+ link.extra_fields["end_datetime"] = format_datetime_to_isostring_zulu(
548
+ parse_datestring_to_tz_aware_datetime(item.properties["end_datetime"])
549
+ )
550
+
551
+ # bubble up data assets based on role
552
+ collected_assets = [
553
+ asset.href
554
+ for asset in item.assets.values()
555
+ if asset.roles and ("data" in asset.roles or "default" in asset.roles)
556
+ ]
557
+ if collected_assets:
558
+ link.extra_fields["assets"] = collected_assets
559
+ # also bubble up item id and cog_href if available
560
+ # TODO: not clear when the item id is needed in the link might be some legacy reference
561
+ # link.extra_fields["item"] = item.id
562
+ if item.assets.get("cog_default"):
563
+ link.extra_fields["cog_href"] = item.assets["cog_default"].href
564
+ collection.update_extent_from_items()
565
+
566
+
442
567
  def read_config_file(path: str) -> dict:
443
568
  # If the given path exists directly, use it
444
569
  if os.path.exists(path):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eodash_catalog
3
- Version: 0.2.1
3
+ Version: 0.3.1
4
4
  Summary: This package is intended to help create a compatible STAC catalog for the eodash dashboard client. It supports configuration of multiple endpoint types for information extraction.
5
5
  Project-URL: Documentation, https://github.com/eodash/eodash_catalog#readme
6
6
  Project-URL: Issues, https://github.com/eodash/eodash_catalog/issues
@@ -29,7 +29,7 @@ Requires-Dist: requests-oauthlib<1.3.2
29
29
  Requires-Dist: requests<3
30
30
  Requires-Dist: setuptools<71
31
31
  Requires-Dist: spdx-lookup<=0.3.3
32
- Requires-Dist: stac-geoparquet<=0.6.0
32
+ Requires-Dist: stac-geoparquet<=0.7.0
33
33
  Requires-Dist: structlog<22.0
34
34
  Description-Content-Type: text/markdown
35
35
 
@@ -0,0 +1,14 @@
1
+ eodash_catalog/__about__.py,sha256=l-uZOyslIByuHXe6EqfhKvRxyfPaTvVIobrJDf86Ai8,137
2
+ eodash_catalog/__init__.py,sha256=_W_9emPYf6FUqc0P8L2SmADx6hGSd7PlQV3yRmCk5uM,115
3
+ eodash_catalog/duration.py,sha256=TBG7v1lCpbYowADK5uJ2M8kPxsvQneFAFi1NIE26dy4,10754
4
+ eodash_catalog/endpoints.py,sha256=Ckvg0MnaXQjTy7hd55kqnGlPmlz5UYGJ2c3HXdvLVNs,49168
5
+ eodash_catalog/generate_indicators.py,sha256=FPeiZm9TE4PpbTyH6UMegQ7HwaARzO91IrLtzFjFSF0,21900
6
+ eodash_catalog/sh_endpoint.py,sha256=XjZsZJ5jfJZLQenSTqUhiUZ5YAu9M9nv2KL1Qv3Be-I,1219
7
+ eodash_catalog/stac_handling.py,sha256=waw8qRjwjdbDBRtialc4bG3WSjXAATklc-W5kLKywqE,25548
8
+ eodash_catalog/thumbnails.py,sha256=oNbWdRC8KTLUC4PbSMlSaiOeLXfkIpa0j-sOZdn1RGU,2262
9
+ eodash_catalog/utils.py,sha256=wEh0hvBVp20NKhdJMOQ4dmiiOrFUOTDP2sRtfe0FBpQ,22563
10
+ eodash_catalog-0.3.1.dist-info/METADATA,sha256=OgbqxdohyQr9kO3WwjwJ3FRs5-5khvd6T7mmivVzpe8,3019
11
+ eodash_catalog-0.3.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
12
+ eodash_catalog-0.3.1.dist-info/entry_points.txt,sha256=kuUQrDG1PtYd8kPjf5XM6H_NtQd9Ozwl0jjiGtAvZSM,87
13
+ eodash_catalog-0.3.1.dist-info/licenses/LICENSE.txt,sha256=oJCW5zQxnFD-J0hGz6Zh5Lkpdk1oAndmWhseTmV224E,1107
14
+ eodash_catalog-0.3.1.dist-info/RECORD,,
@@ -1,14 +0,0 @@
1
- eodash_catalog/__about__.py,sha256=c99Cdt5zAgv8ECLNyPwczOXULCCEQldhtr8lFnXUAQ0,137
2
- eodash_catalog/__init__.py,sha256=_W_9emPYf6FUqc0P8L2SmADx6hGSd7PlQV3yRmCk5uM,115
3
- eodash_catalog/duration.py,sha256=B6XOZfvNU7SuqpxuVtT1kNKODoOQJXDI6mocvA_U1ik,10816
4
- eodash_catalog/endpoints.py,sha256=3ejenyoRp3aT5YyPP_NcUy9nT1OWZydtya34qsQurk4,47521
5
- eodash_catalog/generate_indicators.py,sha256=8h5Xa9WZGXKsUZQbps38Jllt7KFvx7K7mZWzggtxzks,20785
6
- eodash_catalog/sh_endpoint.py,sha256=XjZsZJ5jfJZLQenSTqUhiUZ5YAu9M9nv2KL1Qv3Be-I,1219
7
- eodash_catalog/stac_handling.py,sha256=waw8qRjwjdbDBRtialc4bG3WSjXAATklc-W5kLKywqE,25548
8
- eodash_catalog/thumbnails.py,sha256=oNbWdRC8KTLUC4PbSMlSaiOeLXfkIpa0j-sOZdn1RGU,2262
9
- eodash_catalog/utils.py,sha256=BdFSiF0-M6NlnSKQGE0REI9rMPNmqqG4QjWqmaXN6g8,16946
10
- eodash_catalog-0.2.1.dist-info/METADATA,sha256=tif4YQQCZ5-69IteZQ9VIoOjmFW0__Y5DiphYhA_ZqI,3019
11
- eodash_catalog-0.2.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
12
- eodash_catalog-0.2.1.dist-info/entry_points.txt,sha256=kuUQrDG1PtYd8kPjf5XM6H_NtQd9Ozwl0jjiGtAvZSM,87
13
- eodash_catalog-0.2.1.dist-info/licenses/LICENSE.txt,sha256=oJCW5zQxnFD-J0hGz6Zh5Lkpdk1oAndmWhseTmV224E,1107
14
- eodash_catalog-0.2.1.dist-info/RECORD,,