eodash_catalog 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
1
  # SPDX-FileCopyrightText: 2024-present Daniel Santillan <daniel.santillan@eox.at>
2
2
  #
3
3
  # SPDX-License-Identifier: MIT
4
- __version__ = "0.3.1"
4
+ __version__ = "0.3.3"
@@ -1,4 +1,5 @@
1
1
  import importlib
2
+ import io
2
3
  import json
3
4
  import os
4
5
  import sys
@@ -8,6 +9,7 @@ from datetime import datetime, timedelta
8
9
  from itertools import groupby
9
10
  from operator import itemgetter
10
11
 
12
+ import pyarrow.parquet as pq
11
13
  import requests
12
14
  from pystac import Asset, Catalog, Collection, Item, Link, SpatialExtent, Summaries, TemporalExtent
13
15
  from pystac_client import Client
@@ -28,6 +30,7 @@ from eodash_catalog.thumbnails import generate_thumbnail
28
30
  from eodash_catalog.utils import (
29
31
  Options,
30
32
  create_geometry_from_bbox,
33
+ extract_extent_from_geoparquet,
31
34
  filter_time_entries,
32
35
  format_datetime_to_isostring_zulu,
33
36
  generate_veda_cog_link,
@@ -606,19 +609,31 @@ def handle_GeoDB_endpoint(
606
609
  items = []
607
610
  for v in values:
608
611
  # add items based on inputData fields for each time step available in values
609
- first_match = next(
612
+ first_match: dict = next(
610
613
  (item for item in input_data if item.get("Identifier") == v["input_data"]), None
611
614
  )
612
615
  time_object = datetime.fromisoformat(v["time"])
616
+ if endpoint_config.get("MapReplaceDates"):
617
+ # get mapping of AOI_ID to list of dates
618
+ available_dates_for_aoi_id = endpoint_config.get("MapReplaceDates").get(
619
+ v["aoi_id"]
620
+ )
621
+ if available_dates_for_aoi_id:
622
+ formatted_datetime = time_object.strftime("%Y-%m-%d")
623
+ if formatted_datetime not in available_dates_for_aoi_id:
624
+ # discard this date because not in available map dates
625
+ continue
613
626
  # extract wkt geometry from sub_aoi
614
627
  if "sub_aoi" in v and v["sub_aoi"] != "/":
615
628
  # create geometry from wkt
616
- geometry = mapping(wkt.loads(v["sub_aoi"]))
629
+ shapely_geometry = wkt.loads(v["sub_aoi"])
630
+ geometry = mapping(shapely_geometry)
617
631
  # converting multipolygon to polygon to avoid shapely throwing an exception
618
632
  # in collection extent from geoparquet table generation
619
633
  # while trying to create a multipolygon extent of all multipolygons
620
634
  if geometry["type"] == "MultiPolygon":
621
635
  geometry = {"type": "Polygon", "coordinates": geometry["coordinates"][0]}
636
+ bbox = shapely_geometry.bounds
622
637
  else:
623
638
  geometry = create_geometry_from_bbox(bbox)
624
639
  item = Item(
@@ -637,7 +652,7 @@ def handle_GeoDB_endpoint(
637
652
  "wms:layers": [first_match["Layers"]],
638
653
  "role": ["data"],
639
654
  }
640
- if url.startswith("https://services.sentinel-hub.com/ogc/wms/"):
655
+ if "sentinel-hub.com" in url:
641
656
  instanceId = os.getenv("SH_INSTANCE_ID")
642
657
  if "InstanceId" in endpoint_config:
643
658
  instanceId = endpoint_config["InstanceId"]
@@ -652,7 +667,7 @@ def handle_GeoDB_endpoint(
652
667
  {"wms:dimensions": {"TIME": f"{start_date}/{end_date}"}}
653
668
  )
654
669
  # we add the instance id to the url
655
- url = f"https://services.sentinel-hub.com/ogc/wms/{instanceId}"
670
+ url = f"{url}{instanceId}"
656
671
  else:
657
672
  extra_fields.update({"wms:dimensions": {"TIME": v["time"]}})
658
673
  link = Link(
@@ -664,6 +679,34 @@ def handle_GeoDB_endpoint(
664
679
  )
665
680
  item.add_link(link)
666
681
  items.append(item)
682
+ case "XYZ":
683
+ # handler for NASA apis
684
+ url = first_match["Url"]
685
+ extra_fields = {}
686
+ # replace time to a formatted version
687
+ date_formatted = time_object.strftime(
688
+ first_match.get("DateFormat", "%Y_%m_%d")
689
+ )
690
+ target_url = url.replace("{time}", date_formatted)
691
+ if SiteMapping := first_match.get("SiteMapping"):
692
+ # match with aoi_id
693
+ site = SiteMapping.get(v["aoi_id"])
694
+ # replace in URL
695
+ if site:
696
+ target_url = target_url.replace("{site}", site)
697
+ else:
698
+ LOGGER.info(
699
+ f"Warning: no match for SiteMapping in config for {site}"
700
+ )
701
+ link = Link(
702
+ rel="xyz",
703
+ target=target_url,
704
+ media_type="image/png",
705
+ title=collection_config["Name"],
706
+ extra_fields=extra_fields,
707
+ )
708
+ item.add_link(link)
709
+ items.append(item)
667
710
  save_items(
668
711
  locations_collection,
669
712
  items,
@@ -687,6 +730,7 @@ def handle_GeoDB_endpoint(
687
730
  link.extra_fields["latlng"] = latlon
688
731
  link.extra_fields["country"] = country
689
732
  link.extra_fields["name"] = city
733
+ add_collection_information(catalog_config, locations_collection, collection_config)
690
734
 
691
735
  if "yAxis" not in collection_config:
692
736
  # fetch yAxis and store it to data, preventing need to save it per dataset in yml
@@ -778,19 +822,24 @@ def handle_WMS_endpoint(
778
822
  # Create an item per time to allow visualization in stac clients
779
823
  if len(datetimes) > 0:
780
824
  for dt in datetimes:
825
+ # case of wms interval coming from config
826
+ dt_item = dt[0] if isinstance(dt, list) else dt
781
827
  item = Item(
782
- id=format_datetime_to_isostring_zulu(dt),
828
+ id=format_datetime_to_isostring_zulu(dt_item),
783
829
  bbox=spatial_extent,
784
830
  properties={},
785
831
  geometry=create_geometry_from_bbox(spatial_extent),
786
- datetime=dt,
832
+ datetime=dt_item,
787
833
  stac_extensions=[
788
834
  "https://stac-extensions.github.io/web-map-links/v1.1.0/schema.json",
789
835
  ],
790
836
  assets={"dummy_asset": Asset(href="")},
791
837
  )
792
838
  add_projection_info(endpoint_config, item)
793
- add_visualization_info(item, collection_config, endpoint_config, datetimes=[dt])
839
+ dt_visualization = dt if isinstance(dt, list) else [dt]
840
+ add_visualization_info(
841
+ item, collection_config, endpoint_config, datetimes=dt_visualization
842
+ )
794
843
  items.append(item)
795
844
  else:
796
845
  LOGGER.warn(f"NO datetimes returned for collection: {collection_config['Name']}!")
@@ -875,7 +924,6 @@ def add_visualization_info(
875
924
  start_isostring = format_datetime_to_isostring_zulu(dt)
876
925
  # SH WMS for public collections needs time interval, we use full day here
877
926
  end = dt + timedelta(days=1) - timedelta(milliseconds=1)
878
- # we have start_datetime and end_datetime
879
927
  if len(datetimes) == 2:
880
928
  end = datetimes[1]
881
929
  end_isostring = format_datetime_to_isostring_zulu(end)
@@ -913,7 +961,13 @@ def add_visualization_info(
913
961
  )
914
962
  dimensions[key] = value
915
963
  if datetimes is not None:
916
- dimensions["TIME"] = format_datetime_to_isostring_zulu(datetimes[0])
964
+ if len(datetimes) > 1:
965
+ start = format_datetime_to_isostring_zulu(datetimes[0])
966
+ end = format_datetime_to_isostring_zulu(datetimes[1])
967
+ interval = f"{start}/{end}"
968
+ dimensions["TIME"] = interval
969
+ else:
970
+ dimensions["TIME"] = format_datetime_to_isostring_zulu(datetimes[0])
917
971
  if dimensions != {}:
918
972
  extra_fields["wms:dimensions"] = dimensions
919
973
  if endpoint_config.get("Styles"):
@@ -1218,6 +1272,33 @@ def handle_raw_source(
1218
1272
  # eodash v4 compatibility, adding last referenced style to collection
1219
1273
  if style_link:
1220
1274
  collection.add_link(style_link)
1275
+ elif endpoint_config.get("ParquetSource"):
1276
+ # if parquet source is provided, download it and create items from it
1277
+ parquet_source = endpoint_config["ParquetSource"]
1278
+ if parquet_source.startswith("http"):
1279
+ # download parquet file
1280
+ parquet_file = requests.get(parquet_source)
1281
+ if parquet_file.status_code != 200:
1282
+ LOGGER.error(f"Failed to download parquet file from {parquet_source}")
1283
+ return collection
1284
+ try:
1285
+ table = pq.read_table(io.BytesIO(parquet_file.content))
1286
+ except Exception as e:
1287
+ LOGGER.error(f"Failed to read parquet file: {e}")
1288
+ return collection
1289
+ extents = extract_extent_from_geoparquet(table)
1290
+ collection.extent.temporal = extents[0]
1291
+ collection.extent.spatial = extents[1]
1292
+ collection.add_asset(
1293
+ "geoparquet",
1294
+ Asset(
1295
+ href=parquet_source,
1296
+ media_type="application/vnd.apache.parquet",
1297
+ title="GeoParquet Items",
1298
+ roles=["collection-mirror"],
1299
+ ),
1300
+ )
1301
+
1221
1302
  else:
1222
1303
  LOGGER.warn(f"NO datetimes configured for collection: {collection_config['Name']}!")
1223
1304
 
@@ -18,6 +18,7 @@ from structlog import get_logger
18
18
  from eodash_catalog.utils import (
19
19
  generateDatetimesFromInterval,
20
20
  get_full_url,
21
+ make_intervals,
21
22
  parse_datestring_to_tz_aware_datetime,
22
23
  read_config_file,
23
24
  )
@@ -535,16 +536,22 @@ def add_extra_fields(
535
536
  def get_collection_datetimes_from_config(endpoint_config: dict) -> list[datetime]:
536
537
  times_datetimes: list[datetime] = []
537
538
  if endpoint_config:
539
+ interval_between_dates = endpoint_config.get("WMSIntervalsBetweenDates")
538
540
  if endpoint_config.get("Times"):
539
541
  times = list(endpoint_config.get("Times", []))
540
542
  times_datetimes = sorted(
541
543
  [parse_datestring_to_tz_aware_datetime(time) for time in times]
542
544
  )
545
+ if interval_between_dates:
546
+ # convert to list of datetime_start and datetime_end
547
+ times_datetimes = make_intervals(times_datetimes)
543
548
  elif endpoint_config.get("DateTimeInterval"):
544
549
  start = endpoint_config["DateTimeInterval"].get("Start", "2020-09-01T00:00:00Z")
545
550
  end = endpoint_config["DateTimeInterval"].get("End", "2020-10-01T00:00:00Z")
546
551
  timedelta_config = endpoint_config["DateTimeInterval"].get("Timedelta", {"days": 1})
547
- times_datetimes = generateDatetimesFromInterval(start, end, timedelta_config)
552
+ times_datetimes = generateDatetimesFromInterval(
553
+ start, end, timedelta_config, interval_between_dates
554
+ )
548
555
  return times_datetimes
549
556
 
550
557
 
eodash_catalog/utils.py CHANGED
@@ -18,7 +18,7 @@ from dateutil import parser
18
18
  from owslib.wcs import WebCoverageService
19
19
  from owslib.wms import WebMapService
20
20
  from owslib.wmts import WebMapTileService
21
- from pystac import Asset, Catalog, Collection, Item, Link, RelType, SpatialExtent, TemporalExtent
21
+ from pystac import Asset, Catalog, Collection, Item, RelType, SpatialExtent, TemporalExtent
22
22
  from pytz import timezone as pytztimezone
23
23
  from shapely import geometry as sgeom
24
24
  from shapely import wkb
@@ -214,7 +214,7 @@ def parse_duration(datestring):
214
214
 
215
215
 
216
216
  def generateDatetimesFromInterval(
217
- start: str, end: str, timedelta_config: dict | None = None
217
+ start: str, end: str, timedelta_config: dict | None = None, interval_between_dates: bool = False
218
218
  ) -> list[datetime]:
219
219
  if timedelta_config is None:
220
220
  timedelta_config = {}
@@ -226,7 +226,10 @@ def generateDatetimesFromInterval(
226
226
  delta = timedelta(**timedelta_config)
227
227
  dates = []
228
228
  while start_dt <= end_dt:
229
- dates.append(start_dt)
229
+ if interval_between_dates:
230
+ dates.append([start_dt, start_dt + delta - timedelta(seconds=1)])
231
+ else:
232
+ dates.append(start_dt)
230
233
  start_dt += delta
231
234
  return dates
232
235
 
@@ -444,8 +447,32 @@ def update_extents_from_collection_children(collection: Collection):
444
447
  ):
445
448
  individual_datetimes.extend(c_child.extent.temporal.intervals[0]) # type: ignore
446
449
  individual_datetimes = list(filter(lambda x: x is not None, individual_datetimes))
447
- time_extent = [min(individual_datetimes), max(individual_datetimes)]
448
- collection.extent.temporal = TemporalExtent([time_extent])
450
+ if individual_datetimes:
451
+ time_extent = [min(individual_datetimes), max(individual_datetimes)]
452
+ collection.extent.temporal = TemporalExtent([time_extent])
453
+
454
+
455
+ def extract_extent_from_geoparquet(table) -> tuple[TemporalExtent, SpatialExtent]:
456
+ """
457
+ Extract spatial and temporal extents from a GeoParquet file.
458
+ Args:
459
+ table (pyarrow.Table): The table containing the GeoParquet data.
460
+ Returns:
461
+ tuple: A tuple containing spatial and temporal extents.
462
+ """
463
+ # add extent information to the collection
464
+ min_datetime = pc.min(table["datetime"]).as_py()
465
+ max_datetime = pc.max(table["datetime"]).as_py()
466
+ if not min_datetime:
467
+ # cases when datetime was null
468
+ # fallback to start_datetime
469
+ min_datetime = pc.min(table["start_datetime"]).as_py()
470
+ max_datetime = pc.max(table["start_datetime"]).as_py()
471
+ temporal = TemporalExtent([min_datetime, max_datetime])
472
+ geoms = [wkb.loads(g.as_py()) for g in table["geometry"] if g is not None]
473
+ bbox = sgeom.MultiPolygon(geoms).bounds
474
+ spatial = SpatialExtent([bbox])
475
+ return [temporal, spatial]
449
476
 
450
477
 
451
478
  def save_items(
@@ -497,25 +524,9 @@ def save_items(
497
524
  output_path = f"{buildcatpath}/{colpath}"
498
525
  os.makedirs(output_path, exist_ok=True)
499
526
  stacgp.arrow.to_parquet(table, f"{output_path}/items.parquet")
500
- gp_link = Link(
501
- rel="items",
502
- target="./items.parquet",
503
- media_type="application/vnd.apache.parquet",
504
- title="GeoParquet Items",
505
- )
506
- collection.add_link(gp_link)
507
- # add extent information to the collection
508
- min_datetime = pc.min(table["datetime"]).as_py()
509
- max_datetime = pc.max(table["datetime"]).as_py()
510
- if not min_datetime:
511
- # cases when datetime was null
512
- # fallback to start_datetime
513
- min_datetime = pc.min(table["start_datetime"]).as_py()
514
- max_datetime = pc.max(table["start_datetime"]).as_py()
515
- collection.extent.temporal = TemporalExtent([min_datetime, max_datetime])
516
- geoms = [wkb.loads(g.as_py()) for g in table["geometry"] if g is not None]
517
- bbox = sgeom.MultiPolygon(geoms).bounds
518
- collection.extent.spatial = SpatialExtent([bbox])
527
+ extents = extract_extent_from_geoparquet(table)
528
+ collection.extent.temporal = extents[0]
529
+ collection.extent.spatial = extents[1]
519
530
  # Make sure to also reference the geoparquet as asset
520
531
  collection.add_asset(
521
532
  "geoparquet",
@@ -608,3 +619,26 @@ def merge_bboxes(bboxes: list[list[float]]) -> list[float]:
608
619
  max_lat = max(b[3] for b in bboxes)
609
620
 
610
621
  return [min_lon, min_lat, max_lon, max_lat]
622
+
623
+
624
+ def make_intervals(datetimes: list[datetime]) -> list[list[datetime]]:
625
+ """
626
+ Converts a list of datetimes into list of lists of datetimes in format of [start,end]
627
+ where end is next element in original list minus 1 second
628
+ """
629
+ intervals = []
630
+ n = len(datetimes)
631
+ for i in range(n):
632
+ start = datetimes[i]
633
+ if i < n - 1:
634
+ # end is next datetime minus one second
635
+ end = datetimes[i + 1] - timedelta(seconds=1)
636
+ else:
637
+ prev_interval = timedelta(seconds=0)
638
+ # last item: use previous interval length added to last start
639
+ if n > 1:
640
+ prev_interval = datetimes[-1] - datetimes[-2]
641
+ end = start + prev_interval
642
+ intervals.append([start, end])
643
+ LOGGER.info(intervals)
644
+ return intervals
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eodash_catalog
3
- Version: 0.3.1
3
+ Version: 0.3.3
4
4
  Summary: This package is intended to help create a compatible STAC catalog for the eodash dashboard client. It supports configuration of multiple endpoint types for information extraction.
5
5
  Project-URL: Documentation, https://github.com/eodash/eodash_catalog#readme
6
6
  Project-URL: Issues, https://github.com/eodash/eodash_catalog/issues
@@ -0,0 +1,14 @@
1
+ eodash_catalog/__about__.py,sha256=jakR3FNEOvU2GrvtecX6moFoNPby5Ok50p6_6JcZoEQ,137
2
+ eodash_catalog/__init__.py,sha256=_W_9emPYf6FUqc0P8L2SmADx6hGSd7PlQV3yRmCk5uM,115
3
+ eodash_catalog/duration.py,sha256=TBG7v1lCpbYowADK5uJ2M8kPxsvQneFAFi1NIE26dy4,10754
4
+ eodash_catalog/endpoints.py,sha256=uI7aapRHQ1eDYQYKlrMdLHi_6aitBS3jutrou2HDWJY,53183
5
+ eodash_catalog/generate_indicators.py,sha256=FPeiZm9TE4PpbTyH6UMegQ7HwaARzO91IrLtzFjFSF0,21900
6
+ eodash_catalog/sh_endpoint.py,sha256=XjZsZJ5jfJZLQenSTqUhiUZ5YAu9M9nv2KL1Qv3Be-I,1219
7
+ eodash_catalog/stac_handling.py,sha256=xMhUK_gvijs6tL-1ecg28DtWBo4msd5NTYZpYtt7FHo,25877
8
+ eodash_catalog/thumbnails.py,sha256=oNbWdRC8KTLUC4PbSMlSaiOeLXfkIpa0j-sOZdn1RGU,2262
9
+ eodash_catalog/utils.py,sha256=C-HQK6IYMNAd3Vfgq9KOqsBlJu-jtnZTcsHGp4kj7Y0,23773
10
+ eodash_catalog-0.3.3.dist-info/METADATA,sha256=H1Npnw-SZ3hAerwBs6ml0fscgSP9MkMDbVTryhPq7kM,3019
11
+ eodash_catalog-0.3.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
12
+ eodash_catalog-0.3.3.dist-info/entry_points.txt,sha256=kuUQrDG1PtYd8kPjf5XM6H_NtQd9Ozwl0jjiGtAvZSM,87
13
+ eodash_catalog-0.3.3.dist-info/licenses/LICENSE.txt,sha256=oJCW5zQxnFD-J0hGz6Zh5Lkpdk1oAndmWhseTmV224E,1107
14
+ eodash_catalog-0.3.3.dist-info/RECORD,,
@@ -1,14 +0,0 @@
1
- eodash_catalog/__about__.py,sha256=l-uZOyslIByuHXe6EqfhKvRxyfPaTvVIobrJDf86Ai8,137
2
- eodash_catalog/__init__.py,sha256=_W_9emPYf6FUqc0P8L2SmADx6hGSd7PlQV3yRmCk5uM,115
3
- eodash_catalog/duration.py,sha256=TBG7v1lCpbYowADK5uJ2M8kPxsvQneFAFi1NIE26dy4,10754
4
- eodash_catalog/endpoints.py,sha256=Ckvg0MnaXQjTy7hd55kqnGlPmlz5UYGJ2c3HXdvLVNs,49168
5
- eodash_catalog/generate_indicators.py,sha256=FPeiZm9TE4PpbTyH6UMegQ7HwaARzO91IrLtzFjFSF0,21900
6
- eodash_catalog/sh_endpoint.py,sha256=XjZsZJ5jfJZLQenSTqUhiUZ5YAu9M9nv2KL1Qv3Be-I,1219
7
- eodash_catalog/stac_handling.py,sha256=waw8qRjwjdbDBRtialc4bG3WSjXAATklc-W5kLKywqE,25548
8
- eodash_catalog/thumbnails.py,sha256=oNbWdRC8KTLUC4PbSMlSaiOeLXfkIpa0j-sOZdn1RGU,2262
9
- eodash_catalog/utils.py,sha256=wEh0hvBVp20NKhdJMOQ4dmiiOrFUOTDP2sRtfe0FBpQ,22563
10
- eodash_catalog-0.3.1.dist-info/METADATA,sha256=OgbqxdohyQr9kO3WwjwJ3FRs5-5khvd6T7mmivVzpe8,3019
11
- eodash_catalog-0.3.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
12
- eodash_catalog-0.3.1.dist-info/entry_points.txt,sha256=kuUQrDG1PtYd8kPjf5XM6H_NtQd9Ozwl0jjiGtAvZSM,87
13
- eodash_catalog-0.3.1.dist-info/licenses/LICENSE.txt,sha256=oJCW5zQxnFD-J0hGz6Zh5Lkpdk1oAndmWhseTmV224E,1107
14
- eodash_catalog-0.3.1.dist-info/RECORD,,