eodash_catalog 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of eodash_catalog might be problematic. Click here for more details.

@@ -0,0 +1,1384 @@
1
+ #!/usr/bin/python
2
+ """
3
+ Indicator generator to harvest information from endpoints and generate catalog
4
+
5
+ """
6
+ import time
7
+ import requests
8
+ import json
9
+ from pystac_client import Client
10
+ from dataclasses import dataclass
11
+ from typing import List
12
+ import os
13
+ import re
14
+ from pathlib import Path
15
+ from datetime import datetime, timedelta
16
+ from dotenv import load_dotenv
17
+ import yaml
18
+ from yaml.loader import SafeLoader
19
+ from itertools import groupby
20
+ from operator import itemgetter
21
+ from dateutil import parser
22
+ from eodash_catalog.sh_endpoint import get_SH_token
23
+ from eodash_catalog.utils import (
24
+ create_geojson_point,
25
+ retrieveExtentFromWMSWMTS,
26
+ generateDateIsostringsFromInterval,
27
+ RaisingThread,
28
+ )
29
+ from pystac import (
30
+ Item,
31
+ Asset,
32
+ Catalog,
33
+ Link,
34
+ CatalogType,
35
+ Collection,
36
+ Extent,
37
+ SpatialExtent,
38
+ TemporalExtent,
39
+ Summaries,
40
+ Provider,
41
+ )
42
+ from pystac.layout import TemplateLayoutStrategy
43
+ from pystac.validation import validate_all
44
+ import spdx_lookup as lookup
45
+ import argparse
46
+
47
+ import click
48
+
49
+
50
+ # make sure we are loading the env local definition
51
+ load_dotenv()
52
+
53
+
54
+ def recursive_save(stac_object, no_items=False):
55
+ stac_object.save_object()
56
+ for child in stac_object.get_children():
57
+ recursive_save(child, no_items)
58
+ if not no_items:
59
+ # try to save items if available
60
+ for item in stac_object.get_items():
61
+ item.save_object()
62
+
63
+
64
+ def process_catalog_file(file_path, options):
65
+ print("Processing catalog:", file_path)
66
+ with open(file_path) as f:
67
+ config = yaml.load(f, Loader=SafeLoader)
68
+
69
+ if len(options.collections) > 0:
70
+ # create only catalogs containing the passed collections
71
+ process_collections = [
72
+ c for c in config["collections"] if c in options.collections
73
+ ]
74
+ elif (len(options.collections) == 1 and options.collections == "all") or len(
75
+ options.collections
76
+ ) == 0:
77
+ # create full catalog
78
+ process_collections = config["collections"]
79
+ if len(process_collections) == 0:
80
+ print("No applicable collections found for catalog, skipping creation")
81
+ return
82
+ catalog = Catalog(
83
+ id=config["id"],
84
+ description=config["description"],
85
+ title=config["title"],
86
+ catalog_type=CatalogType.RELATIVE_PUBLISHED,
87
+ )
88
+ for collection in process_collections:
89
+ process_collection_file(
90
+ config,
91
+ "%s/%s.yaml" % (options.collectionsspath, collection),
92
+ catalog,
93
+ options,
94
+ )
95
+
96
+ strategy = TemplateLayoutStrategy(item_template="${collection}/${year}")
97
+ catalog.normalize_hrefs(
98
+ "%s/%s" % (options.outputpath, config["id"]), strategy=strategy
99
+ )
100
+
101
+ print("Started creation of collection files")
102
+ start = time.time()
103
+ if options.ni:
104
+ recursive_save(catalog, options.ni)
105
+ else:
106
+ # For full catalog save with items this still seems to be faster
107
+ catalog.save(dest_href="%s/%s" % (options.outputpath, config["id"]))
108
+ end = time.time()
109
+ print(f"Catalog {config['id']}: Time consumed in saving: {end - start}")
110
+
111
+ if options.vd:
112
+ # try to validate catalog if flag was set
113
+ print("Running validation of catalog %s" % file_path)
114
+ try:
115
+ validate_all(catalog.to_dict(), href=config["endpoint"])
116
+ except Exception as e:
117
+ print("Issue validation collection: %s" % e)
118
+
119
+
120
+ def process_collection_file(config, file_path, catalog, options):
121
+ print("Processing collection:", file_path)
122
+ with open(file_path) as f:
123
+ data = yaml.load(f, Loader=SafeLoader)
124
+ if "Resources" in data:
125
+ for resource in data["Resources"]:
126
+ if "EndPoint" in resource:
127
+ if resource["Name"] == "Sentinel Hub":
128
+ handle_SH_endpoint(config, resource, data, catalog)
129
+ elif resource["Name"] == "Sentinel Hub WMS":
130
+ collection = handle_SH_WMS_endpoint(
131
+ config, resource, data, catalog
132
+ )
133
+ elif resource["Name"] == "GeoDB":
134
+ collection = handle_GeoDB_endpoint(
135
+ config, resource, data, catalog
136
+ )
137
+ add_to_catalog(collection, catalog, resource, data)
138
+ elif resource["Name"] == "VEDA":
139
+ handle_VEDA_endpoint(config, resource, data, catalog)
140
+ elif resource["Name"] == "marinedatastore":
141
+ handle_WMS_endpoint(config, resource, data, catalog, wmts=True)
142
+ elif resource["Name"] == "xcube":
143
+ handle_xcube_endpoint(config, resource, data, catalog)
144
+ elif resource["Name"] == "WMS":
145
+ handle_WMS_endpoint(config, resource, data, catalog)
146
+ elif resource["Name"] == "GeoDB Vector Tiles":
147
+ handle_GeoDB_Tiles_endpoint(config, resource, data, catalog)
148
+ elif resource["Name"] == "Collection-only":
149
+ handle_collection_only(config, resource, data, catalog)
150
+ else:
151
+ raise ValueError("Type of Resource is not supported")
152
+ elif "Subcollections" in data:
153
+ # if no endpoint is specified we check for definition of subcollections
154
+ parent_collection, _ = get_or_create_collection(
155
+ catalog, data["Name"], data, config
156
+ )
157
+
158
+ locations = []
159
+ countries = []
160
+ for sub_coll_def in data["Subcollections"]:
161
+ # Subcollection has only data on one location which is defined for the entire collection
162
+ if "Name" in sub_coll_def and "Point" in sub_coll_def:
163
+ locations.append(sub_coll_def["Name"])
164
+ if isinstance(sub_coll_def["Country"], list):
165
+ countries.extend(sub_coll_def["Country"])
166
+ else:
167
+ countries.append(sub_coll_def["Country"])
168
+ process_collection_file(
169
+ config,
170
+ "../collections/%s.yaml" % (sub_coll_def["Collection"]),
171
+ parent_collection,
172
+ options,
173
+ )
174
+ # find link in parent collection to update metadata
175
+ for link in parent_collection.links:
176
+ if (
177
+ link.rel == "child"
178
+ and "id" in link.extra_fields
179
+ and link.extra_fields["id"] == sub_coll_def["Identifier"]
180
+ ):
181
+ latlng = "%s,%s" % (
182
+ sub_coll_def["Point"][1],
183
+ sub_coll_def["Point"][0],
184
+ )
185
+ link.extra_fields["id"] = sub_coll_def["Identifier"]
186
+ link.extra_fields["latlng"] = latlng
187
+ link.extra_fields["name"] = sub_coll_def["Name"]
188
+ # Update title of collection to use location name
189
+ sub_collection = parent_collection.get_child(
190
+ id=sub_coll_def["Identifier"]
191
+ )
192
+ if sub_collection:
193
+ sub_collection.title = sub_coll_def["Name"]
194
+ # The subcollection has multiple locations which need to be extracted and elevated to parent collection level
195
+ else:
196
+ # create temp catalog to save collection
197
+ tmp_catalog = Catalog(
198
+ id="tmp_catalog", description="temp catalog placeholder"
199
+ )
200
+ process_collection_file(
201
+ config,
202
+ "../collections/%s.yaml" % (sub_coll_def["Collection"]),
203
+ tmp_catalog,
204
+ options,
205
+ )
206
+ links = tmp_catalog.get_child(
207
+ sub_coll_def["Identifier"]
208
+ ).get_links()
209
+ for link in links:
210
+ # extract summary information
211
+ if "city" in link.extra_fields:
212
+ locations.append(link.extra_fields["city"])
213
+ if "country" in link.extra_fields:
214
+ if isinstance(link.extra_fields["country"], list):
215
+ countries.extend(link.extra_fields["country"])
216
+ else:
217
+ countries.append(link.extra_fields["country"])
218
+
219
+ parent_collection.add_links(links)
220
+
221
+ add_collection_information(config, parent_collection, data)
222
+ parent_collection.update_extent_from_items()
223
+ # Add bbox extents from children
224
+ for c_child in parent_collection.get_children():
225
+ parent_collection.extent.spatial.bboxes.append(
226
+ c_child.extent.spatial.bboxes[0]
227
+ )
228
+ # Fill summaries for locations
229
+ parent_collection.summaries = Summaries(
230
+ {
231
+ "cities": list(set(locations)),
232
+ "countries": list(set(countries)),
233
+ }
234
+ )
235
+ add_to_catalog(parent_collection, catalog, None, data)
236
+
237
+
238
+ def handle_collection_only(config, endpoint, data, catalog):
239
+ collection, times = get_or_create_collection(
240
+ catalog, data["Name"], data, config, endpoint
241
+ )
242
+ if len(times) > 0 and not endpoint.get("Disable_Items"):
243
+ for t in times:
244
+ item = Item(
245
+ id=t,
246
+ bbox=endpoint.get("OverwriteBBox"),
247
+ properties={},
248
+ geometry=None,
249
+ datetime=parser.isoparse(t),
250
+ )
251
+ link = collection.add_item(item)
252
+ link.extra_fields["datetime"] = t
253
+ add_collection_information(config, collection, data)
254
+ add_to_catalog(collection, catalog, None, data)
255
+
256
+
257
+ def handle_WMS_endpoint(config, endpoint, data, catalog, wmts=False):
258
+ collection, times = get_or_create_collection(
259
+ catalog, data["Name"], data, config, endpoint
260
+ )
261
+ spatial_extent = collection.extent.spatial.to_dict().get(
262
+ "bbox", [-180, -90, 180, 90]
263
+ )[0]
264
+ if not endpoint.get("Type") == "OverwriteTimes" or not endpoint.get(
265
+ "OverwriteBBox"
266
+ ):
267
+
268
+ # some endpoints allow "narrowed-down" capabilities per-layer, which we utilize to not
269
+ # have to process full service capabilities XML
270
+ capabilities_url = endpoint["EndPoint"]
271
+ spatial_extent, times = retrieveExtentFromWMSWMTS(
272
+ capabilities_url, endpoint["LayerId"], wmts=wmts
273
+ )
274
+ # Create an item per time to allow visualization in stac clients
275
+ if len(times) > 0 and not endpoint.get("Disable_Items"):
276
+ for t in times:
277
+ item = Item(
278
+ id=t,
279
+ bbox=spatial_extent,
280
+ properties={},
281
+ geometry=None,
282
+ datetime=parser.isoparse(t),
283
+ )
284
+ add_visualization_info(item, data, endpoint, time=t)
285
+ link = collection.add_item(item)
286
+ link.extra_fields["datetime"] = t
287
+ collection.update_extent_from_items()
288
+
289
+ # Check if we should overwrite bbox
290
+ if "OverwriteBBox" in endpoint:
291
+ collection.extent.spatial = SpatialExtent(
292
+ [
293
+ endpoint["OverwriteBBox"],
294
+ ]
295
+ )
296
+
297
+ add_visualization_info(collection, data, endpoint)
298
+ add_collection_information(config, collection, data)
299
+ add_to_catalog(collection, catalog, endpoint, data)
300
+
301
+
302
+ def handle_SH_endpoint(config, endpoint, data, catalog):
303
+ token = get_SH_token()
304
+ headers = {"Authorization": "Bearer %s" % token}
305
+ endpoint["EndPoint"] = "https://services.sentinel-hub.com/api/v1/catalog/1.0.0/"
306
+ # Overwrite collection id with type, such as ZARR or BYOC
307
+ if "Type" in endpoint:
308
+ endpoint["CollectionId"] = endpoint["Type"] + "-" + endpoint["CollectionId"]
309
+ handle_STAC_based_endpoint(config, endpoint, data, catalog, headers)
310
+
311
+
312
+ def handle_SH_WMS_endpoint(config, endpoint, data, catalog):
313
+ # create collection and subcollections (based on locations)
314
+ if "Locations" in data:
315
+ root_collection, _ = get_or_create_collection(
316
+ catalog, data["Name"], data, config, endpoint
317
+ )
318
+ for location in data["Locations"]:
319
+ # create and populate location collections based on times
320
+ # TODO: Should we add some new description per location?
321
+ location_config = {
322
+ "Title": location["Name"],
323
+ "Description": "",
324
+ }
325
+ collection, _ = get_or_create_collection(
326
+ catalog, location["Identifier"], location_config, config, endpoint
327
+ )
328
+ collection.extra_fields["endpointtype"] = endpoint["Name"]
329
+ for time in location["Times"]:
330
+ item = Item(
331
+ id=time,
332
+ bbox=location["Bbox"],
333
+ properties={},
334
+ geometry=None,
335
+ datetime=parser.isoparse(time),
336
+ )
337
+ add_visualization_info(item, data, endpoint, time=time)
338
+ item_link = collection.add_item(item)
339
+ item_link.extra_fields["datetime"] = time
340
+
341
+ link = root_collection.add_child(collection)
342
+ # bubble up information we want to the link
343
+ latlng = "%s,%s" % (location["Point"][1], location["Point"][0])
344
+ link.extra_fields["id"] = location["Identifier"]
345
+ link.extra_fields["latlng"] = latlng
346
+ link.extra_fields["country"] = location["Country"]
347
+ link.extra_fields["city"] = location["Name"]
348
+ collection.update_extent_from_items()
349
+ add_visualization_info(collection, data, endpoint)
350
+
351
+ root_collection.update_extent_from_items()
352
+ # Add bbox extents from children
353
+ for c_child in root_collection.get_children():
354
+ root_collection.extent.spatial.bboxes.append(
355
+ c_child.extent.spatial.bboxes[0]
356
+ )
357
+ add_to_catalog(root_collection, catalog, endpoint, data)
358
+ return root_collection
359
+
360
+
361
+ def handle_VEDA_endpoint(config, endpoint, data, catalog):
362
+ handle_STAC_based_endpoint(config, endpoint, data, catalog)
363
+
364
+
365
+ def handle_xcube_endpoint(config, endpoint, data, catalog):
366
+ root_collection = process_STAC_Datacube_Endpoint(
367
+ config=config,
368
+ endpoint=endpoint,
369
+ data=data,
370
+ catalog=catalog,
371
+ )
372
+
373
+ add_example_info(root_collection, data, endpoint, config)
374
+ add_to_catalog(root_collection, catalog, endpoint, data)
375
+
376
+
377
+ def get_or_create_collection(catalog, collection_id, data, config, endpoint=None):
378
+ # Check if collection already in catalog
379
+ for collection in catalog.get_collections():
380
+ if collection.id == collection_id:
381
+ return collection, []
382
+ # If none found create a new one
383
+ spatial_extent = [-180.0, -90.0, 180.0, 90.0]
384
+ if endpoint and endpoint.get("OverwriteBBox"):
385
+ spatial_extent = endpoint.get("OverwriteBBox")
386
+ spatial_extent = SpatialExtent(
387
+ [
388
+ spatial_extent,
389
+ ]
390
+ )
391
+ times = []
392
+ temporal_extent = TemporalExtent([[datetime.now(), None]])
393
+ if endpoint and endpoint.get("Type") == "OverwriteTimes":
394
+ if endpoint.get("Times"):
395
+ times = endpoint.get("Times")
396
+ times_datetimes = sorted([parser.isoparse(time) for time in times])
397
+ temporal_extent = TemporalExtent(
398
+ [[times_datetimes[0], times_datetimes[-1]]]
399
+ )
400
+ elif endpoint.get("DateTimeInterval"):
401
+ start = endpoint["DateTimeInterval"].get("Start", "2020-09-01T00:00:00")
402
+ end = endpoint["DateTimeInterval"].get("End", "2020-10-01T00:00:00")
403
+ timedelta_config = endpoint["DateTimeInterval"].get(
404
+ "Timedelta", {"days": 1}
405
+ )
406
+ times = generateDateIsostringsFromInterval(start, end, timedelta_config)
407
+ times_datetimes = sorted([parser.isoparse(time) for time in times])
408
+ temporal_extent = TemporalExtent(
409
+ [[times_datetimes[0], times_datetimes[-1]]]
410
+ )
411
+ extent = Extent(spatial=spatial_extent, temporal=temporal_extent)
412
+
413
+ # Check if description is link to markdown file
414
+ if "Description" in data:
415
+ description = data["Description"]
416
+ if description.endswith((".md", ".MD")):
417
+ if description.startswith(("http")):
418
+ # if full absolute path is defined
419
+ response = requests.get(description)
420
+ if response.status_code == 200:
421
+ description = response.text
422
+ elif "Subtitle" in data:
423
+ print("WARNING: Markdown file could not be fetched")
424
+ description = data["Subtitle"]
425
+ else:
426
+ # relative path to assets was given
427
+ response = requests.get(
428
+ "%s/%s" % (config["assets_endpoint"], description)
429
+ )
430
+ if response.status_code == 200:
431
+ description = response.text
432
+ elif "Subtitle" in data:
433
+ print("WARNING: Markdown file could not be fetched")
434
+ description = data["Subtitle"]
435
+ elif "Subtitle" in data:
436
+ # Try to use at least subtitle to fill some information
437
+ description = data["Subtitle"]
438
+
439
+ collection = Collection(
440
+ id=collection_id,
441
+ title=data["Title"],
442
+ description=description,
443
+ stac_extensions=[
444
+ "https://stac-extensions.github.io/web-map-links/v1.1.0/schema.json",
445
+ "https://stac-extensions.github.io/example-links/v0.0.1/schema.json",
446
+ "https://stac-extensions.github.io/scientific/v1.0.0/schema.json",
447
+ ],
448
+ extent=extent,
449
+ )
450
+ return (collection, times)
451
+
452
+
453
+ def add_to_catalog(collection, catalog, endpoint, data):
454
+ # check if already in catalog, if it is do not re-add it
455
+ # TODO: probably we should add to the catalog only when creating
456
+ for cat_coll in catalog.get_collections():
457
+ if cat_coll.id == collection.id:
458
+ return
459
+
460
+ link = catalog.add_child(collection)
461
+ # bubble fields we want to have up to collection link and add them to collection
462
+ if endpoint and "Type" in endpoint:
463
+ collection.extra_fields["endpointtype"] = "%s_%s" % (
464
+ endpoint["Name"],
465
+ endpoint["Type"],
466
+ )
467
+ link.extra_fields["endpointtype"] = "%s_%s" % (
468
+ endpoint["Name"],
469
+ endpoint["Type"],
470
+ )
471
+ elif endpoint:
472
+ collection.extra_fields["endpointtype"] = endpoint["Name"]
473
+ link.extra_fields["endpointtype"] = endpoint["Name"]
474
+ # Disabling bubbling up of description as now it is considered to be
475
+ # used as markdown loading would increase the catalog size unnecessarily
476
+ # link.extra_fields["description"] = collection.description
477
+ if "Subtitle" in data:
478
+ link.extra_fields["subtitle"] = data["Subtitle"]
479
+ link.extra_fields["title"] = collection.title
480
+ link.extra_fields["code"] = data["EodashIdentifier"]
481
+ link.extra_fields["id"] = data["Name"]
482
+ # Check for summaries and bubble up info
483
+ if collection.summaries.lists:
484
+ for sum in collection.summaries.lists:
485
+ link.extra_fields[sum] = collection.summaries.lists[sum]
486
+
487
+ add_extra_fields(link, data)
488
+ return link
489
+
490
+
491
+ def add_extra_fields(stac_object, data):
492
+ if "yAxis" in data:
493
+ stac_object.extra_fields["yAxis"] = data["yAxis"]
494
+ if "Themes" in data:
495
+ stac_object.extra_fields["themes"] = data["Themes"]
496
+ if "Locations" in data or "Subcollections" in data:
497
+ stac_object.extra_fields["locations"] = True
498
+ if "Tags" in data:
499
+ stac_object.extra_fields["tags"] = data["Tags"]
500
+ if "Satellite" in data:
501
+ stac_object.extra_fields["satellite"] = data["Satellite"]
502
+ if "Sensor" in data:
503
+ stac_object.extra_fields["sensor"] = data["Sensor"]
504
+ if "Agency" in data:
505
+ stac_object.extra_fields["agency"] = data["Agency"]
506
+ if "yAxis" in data:
507
+ stac_object.extra_fields["yAxis"] = data["yAxis"]
508
+ if "DataSource" in data:
509
+ if "Spaceborne" in data["DataSource"]:
510
+ if "Sensor" in data["DataSource"]["Spaceborne"]:
511
+ stac_object.extra_fields["sensor"] = data["DataSource"]["Spaceborne"][
512
+ "Sensor"
513
+ ]
514
+ if "Satellite" in data["DataSource"]["Spaceborne"]:
515
+ stac_object.extra_fields["satellite"] = data["DataSource"][
516
+ "Spaceborne"
517
+ ]["Satellite"]
518
+ if "InSitu" in data["DataSource"]:
519
+ stac_object.extra_fields["insituSources"] = data["DataSource"]["InSitu"]
520
+ if "Other" in data["DataSource"]:
521
+ stac_object.extra_fields["otherSources"] = data["DataSource"]["Other"]
522
+
523
+
524
+ def handle_GeoDB_endpoint(config, endpoint, data, catalog):
525
+ collection, _ = get_or_create_collection(
526
+ catalog, endpoint["CollectionId"], data, config, endpoint
527
+ )
528
+ select = "?select=aoi,aoi_id,country,city,time"
529
+ url = (
530
+ endpoint["EndPoint"]
531
+ + endpoint["Database"]
532
+ + "_%s" % endpoint["CollectionId"]
533
+ + select
534
+ )
535
+ if additional_query_parameters := endpoint.get("AdditionalQueryString"):
536
+ url += f"&{additional_query_parameters}"
537
+ response = json.loads(requests.get(url).text)
538
+
539
+ # Sort locations by key
540
+ sorted_locations = sorted(response, key=itemgetter("aoi_id"))
541
+ cities = []
542
+ countries = []
543
+ for key, value in groupby(sorted_locations, key=itemgetter("aoi_id")):
544
+ # Finding min and max values for date
545
+ values = [v for v in value]
546
+ times = [datetime.fromisoformat(t["time"]) for t in values]
547
+ unique_values = list({v["aoi_id"]: v for v in values}.values())[0]
548
+ country = unique_values["country"]
549
+ city = unique_values["city"]
550
+ IdKey = endpoint.get("IdKey", "city")
551
+ IdValue = unique_values[IdKey]
552
+ if country not in countries:
553
+ countries.append(country)
554
+ # sanitize unique key identifier to be sure it is saveable as a filename
555
+ if IdValue is not None:
556
+ IdValue = "".join(
557
+ [c for c in IdValue if c.isalpha() or c.isdigit() or c == " "]
558
+ ).rstrip()
559
+ # Additional check to see if unique key name is empty afterwards
560
+ if IdValue == "" or IdValue is None:
561
+ # use aoi_id as a fallback unique id instead of configured key
562
+ IdValue = key
563
+ if city not in cities:
564
+ cities.append(city)
565
+ min_date = min(times)
566
+ max_date = max(times)
567
+ latlon = unique_values["aoi"]
568
+ [lat, lon] = [float(x) for x in latlon.split(",")]
569
+ # create item for unique locations
570
+ buff = 0.01
571
+ bbox = [lon - buff, lat - buff, lon + buff, lat + buff]
572
+ item = Item(
573
+ id=IdValue,
574
+ bbox=bbox,
575
+ properties={},
576
+ geometry=create_geojson_point(lon, lat),
577
+ datetime=None,
578
+ start_datetime=min_date,
579
+ end_datetime=max_date,
580
+ )
581
+ link = collection.add_item(item)
582
+ # bubble up information we want to the link
583
+ link.extra_fields["id"] = key
584
+ link.extra_fields["latlng"] = latlon
585
+ link.extra_fields["country"] = country
586
+ link.extra_fields["city"] = city
587
+
588
+ if "yAxis" not in data:
589
+ # fetch yAxis and store it to data, preventing need to save it per dataset in yml
590
+ select = "?select=y_axis&limit=1"
591
+ url = (
592
+ endpoint["EndPoint"]
593
+ + endpoint["Database"]
594
+ + "_%s" % endpoint["CollectionId"]
595
+ + select
596
+ )
597
+ response = json.loads(requests.get(url).text)
598
+ yAxis = response[0]["y_axis"]
599
+ data["yAxis"] = yAxis
600
+ add_collection_information(config, collection, data)
601
+ add_example_info(collection, data, endpoint, config)
602
+
603
+ collection.update_extent_from_items()
604
+ collection.summaries = Summaries(
605
+ {
606
+ "cities": cities,
607
+ "countries": countries,
608
+ }
609
+ )
610
+ return collection
611
+
612
+
613
+ def handle_STAC_based_endpoint(config, endpoint, data, catalog, headers=None):
614
+ if "Locations" in data:
615
+ root_collection, _ = get_or_create_collection(
616
+ catalog, data["Name"], data, config, endpoint
617
+ )
618
+ for location in data["Locations"]:
619
+ if "FilterDates" in location:
620
+ collection = process_STACAPI_Endpoint(
621
+ config=config,
622
+ endpoint=endpoint,
623
+ data=data,
624
+ catalog=catalog,
625
+ headers=headers,
626
+ bbox=",".join(map(str, location["Bbox"])),
627
+ filter_dates=location["FilterDates"],
628
+ root_collection=root_collection,
629
+ )
630
+ else:
631
+ collection = process_STACAPI_Endpoint(
632
+ config=config,
633
+ endpoint=endpoint,
634
+ data=data,
635
+ catalog=catalog,
636
+ headers=headers,
637
+ bbox=",".join(map(str, location["Bbox"])),
638
+ root_collection=root_collection,
639
+ )
640
+ # Update identifier to use location as well as title
641
+ # TODO: should we use the name as id? it provides much more
642
+ # information in the clients
643
+ collection.id = location["Identifier"]
644
+ collection.title = (location["Name"],)
645
+ # See if description should be overwritten
646
+ if "Description" in location:
647
+ collection.description = location["Description"]
648
+ else:
649
+ collection.description = location["Name"]
650
+ # TODO: should we remove all assets from sub collections?
651
+ link = root_collection.add_child(collection)
652
+ latlng = "%s,%s" % (location["Point"][1], location["Point"][0])
653
+ # Add extra properties we need
654
+ link.extra_fields["id"] = location["Identifier"]
655
+ link.extra_fields["latlng"] = latlng
656
+ link.extra_fields["name"] = location["Name"]
657
+ add_example_info(collection, data, endpoint, config)
658
+ if "OverwriteBBox" in location:
659
+ collection.extent.spatial = SpatialExtent(
660
+ [
661
+ location["OverwriteBBox"],
662
+ ]
663
+ )
664
+ root_collection.update_extent_from_items()
665
+ # Add bbox extents from children
666
+ for c_child in root_collection.get_children():
667
+ root_collection.extent.spatial.bboxes.append(
668
+ c_child.extent.spatial.bboxes[0]
669
+ )
670
+ else:
671
+ if "Bbox" in endpoint:
672
+ root_collection = process_STACAPI_Endpoint(
673
+ config=config,
674
+ endpoint=endpoint,
675
+ data=data,
676
+ catalog=catalog,
677
+ headers=headers,
678
+ bbox=",".join(map(str, endpoint["Bbox"])),
679
+ )
680
+ else:
681
+ root_collection = process_STACAPI_Endpoint(
682
+ config=config,
683
+ endpoint=endpoint,
684
+ data=data,
685
+ catalog=catalog,
686
+ headers=headers,
687
+ )
688
+
689
+ add_example_info(root_collection, data, endpoint, config)
690
+ add_to_catalog(root_collection, catalog, endpoint, data)
691
+
692
+
693
+ def add_example_info(stac_object, data, endpoint, config):
694
+ if "Services" in data:
695
+ for service in data["Services"]:
696
+ if service["Name"] == "Statistical API":
697
+ service_type = "byoc" if "Type" not in service else service["Type"]
698
+ stac_object.add_link(
699
+ Link(
700
+ rel="example",
701
+ target="%s/%s" % (config["assets_endpoint"], service["Script"]),
702
+ title="evalscript",
703
+ media_type="application/javascript",
704
+ extra_fields={
705
+ "example:language": "JavaScript",
706
+ "dataId": "%s-%s" % (service_type, service["CollectionId"]),
707
+ },
708
+ )
709
+ )
710
+ if service["Name"] == "VEDA Statistics":
711
+ stac_object.add_link(
712
+ Link(
713
+ rel="example",
714
+ target=service["Endpoint"],
715
+ title=service["Name"],
716
+ media_type="application/json",
717
+ extra_fields={
718
+ "example:language": "JSON",
719
+ },
720
+ )
721
+ )
722
+ if service["Name"] == "EOxHub Notebook":
723
+ # TODO: we need to consider if we can improve information added
724
+ stac_object.add_link(
725
+ Link(
726
+ rel="example",
727
+ target=service["Url"],
728
+ title=(
729
+ service["Title"] if "Title" in service else service["Name"]
730
+ ),
731
+ media_type="application/x-ipynb+json",
732
+ extra_fields={
733
+ "example:language": "Jupyter Notebook",
734
+ "example:container": True,
735
+ },
736
+ )
737
+ )
738
+ elif "Resources" in data:
739
+ for service in data["Resources"]:
740
+ if service.get("Name") == "xcube":
741
+ target_url = "%s/timeseries/%s/%s?aggMethods=median" % (
742
+ endpoint["EndPoint"],
743
+ endpoint["DatacubeId"],
744
+ endpoint["Variable"],
745
+ )
746
+ stac_object.add_link(
747
+ Link(
748
+ rel="example",
749
+ target=target_url,
750
+ title=service["Name"] + " analytics",
751
+ media_type="application/json",
752
+ extra_fields={
753
+ "example:language": "JSON",
754
+ "example:method": "POST",
755
+ },
756
+ )
757
+ )
758
+
759
+
760
+ def generate_veda_cog_link(endpoint, file_url):
761
+ bidx = ""
762
+ if "Bidx" in endpoint:
763
+ # Check if an array was provided
764
+ if hasattr(endpoint["Bidx"], "__len__"):
765
+ for band in endpoint["Bidx"]:
766
+ bidx = bidx + "&bidx=%s" % (band)
767
+ else:
768
+ bidx = "&bidx=%s" % (endpoint["Bidx"])
769
+
770
+ colormap = ""
771
+ if "Colormap" in endpoint:
772
+ colormap = "&colormap=%s" % (endpoint["Colormap"])
773
+ # TODO: For now we assume a already urlparsed colormap definition
774
+ # it could be nice to allow a json and better convert it on the fly
775
+ # colormap = "&colormap=%s"%(urllib.parse.quote(str(endpoint["Colormap"])))
776
+
777
+ colormap_name = ""
778
+ if "ColormapName" in endpoint:
779
+ colormap_name = "&colormap_name=%s" % (endpoint["ColormapName"])
780
+
781
+ rescale = ""
782
+ if "Rescale" in endpoint:
783
+ rescale = "&rescale=%s,%s" % (endpoint["Rescale"][0], endpoint["Rescale"][1])
784
+
785
+ if file_url:
786
+ file_url = "url=%s&" % (file_url)
787
+ else:
788
+ file_url = ""
789
+
790
+ target_url = (
791
+ "https://staging-raster.delta-backend.com/cog/tiles/WebMercatorQuad/{z}/{x}/{y}?%sresampling_method=nearest%s%s%s%s"
792
+ % (
793
+ file_url,
794
+ bidx,
795
+ colormap,
796
+ colormap_name,
797
+ rescale,
798
+ )
799
+ )
800
+ return target_url
801
+
802
+
803
+ def generate_veda_tiles_link(endpoint, item):
804
+ collection = "collection=%s" % endpoint["CollectionId"]
805
+ assets = ""
806
+ for asset in endpoint["Assets"]:
807
+ assets += "&assets=%s" % asset
808
+ color_formula = ""
809
+ if "ColorFormula" in endpoint:
810
+ color_formula = "&color_formula=%s" % endpoint["ColorFormula"]
811
+ no_data = ""
812
+ if "NoData" in endpoint:
813
+ no_data = "&no_data=%s" % endpoint["NoData"]
814
+ if item:
815
+ item = "&item=%s" % (item)
816
+ else:
817
+ item = ""
818
+ target_url = (
819
+ "https://staging-raster.delta-backend.com/stac/tiles/WebMercatorQuad/{z}/{x}/{y}?%s%s%s%s%s"
820
+ % (
821
+ collection,
822
+ item,
823
+ assets,
824
+ color_formula,
825
+ no_data,
826
+ )
827
+ )
828
+ return target_url
829
+
830
+
831
+ def add_visualization_info(stac_object, data, endpoint, file_url=None, time=None):
832
+ # add extension reference
833
+ if endpoint["Name"] == "Sentinel Hub" or endpoint["Name"] == "Sentinel Hub WMS":
834
+ instanceId = os.getenv("SH_INSTANCE_ID")
835
+ if "InstanceId" in endpoint:
836
+ instanceId = endpoint["InstanceId"]
837
+ extra_fields = {
838
+ "wms:layers": [endpoint["LayerId"]],
839
+ }
840
+ if time != None:
841
+ if endpoint["Name"] == "Sentinel Hub WMS":
842
+ # SH WMS for public collections needs time interval, we use full day here
843
+ datetime_object = datetime.strptime(time, "%Y-%m-%d")
844
+ extra_fields["wms:dimensions"] = {
845
+ "TIME": "%s/%s"
846
+ % (
847
+ datetime_object.isoformat(),
848
+ (
849
+ datetime_object
850
+ + timedelta(days=1)
851
+ - timedelta(milliseconds=1)
852
+ ).isoformat(),
853
+ )
854
+ }
855
+ if endpoint["Name"] == "Sentinel Hub":
856
+ extra_fields["wms:dimensions"] = {"TIME": time}
857
+ stac_object.add_link(
858
+ Link(
859
+ rel="wms",
860
+ target="https://services.sentinel-hub.com/ogc/wms/%s" % (instanceId),
861
+ media_type="text/xml",
862
+ title=data["Name"],
863
+ extra_fields=extra_fields,
864
+ )
865
+ )
866
+ # elif resource["Name"] == "GeoDB":
867
+ # pass
868
+ elif endpoint["Name"] == "WMS":
869
+ extra_fields = {"wms:layers": [endpoint["LayerId"]]}
870
+ if time != None:
871
+ extra_fields["wms:dimensions"] = {
872
+ "TIME": time,
873
+ }
874
+ if "Styles" in endpoint:
875
+ extra_fields["wms:styles"] = endpoint["Styles"]
876
+ media_type = "image/jpeg"
877
+ if "MediaType" in endpoint:
878
+ media_type = endpoint["MediaType"]
879
+ stac_object.add_link(
880
+ Link(
881
+ rel="wms",
882
+ target=endpoint["EndPoint"],
883
+ media_type=media_type,
884
+ title=data["Name"],
885
+ extra_fields=extra_fields,
886
+ )
887
+ )
888
+ elif endpoint["Name"] == "xcube":
889
+ if endpoint["Type"] == "zarr":
890
+ # either preset ColormapName of left as a template
891
+ cbar = endpoint.get("ColormapName", "{cbar}")
892
+ # either preset Rescale of left as a template
893
+ vmin = "{vmin}"
894
+ vmax = "{vmax}"
895
+ if "Rescale" in endpoint:
896
+ vmin = endpoint["Rescale"][0]
897
+ vmax = endpoint["Rescale"][1]
898
+ crs = endpoint.get("Crs", "EPSG:3857")
899
+ target_url = (
900
+ "%s/tiles/%s/%s/{z}/{y}/{x}?crs=%s&time={time}&vmin=%s&vmax=%s&cbar=%s"
901
+ % (
902
+ endpoint["EndPoint"],
903
+ endpoint["DatacubeId"],
904
+ endpoint["Variable"],
905
+ crs,
906
+ vmin,
907
+ vmax,
908
+ cbar,
909
+ )
910
+ )
911
+ stac_object.add_link(
912
+ Link(
913
+ rel="xyz",
914
+ target=target_url,
915
+ media_type="image/png",
916
+ title="xcube tiles",
917
+ )
918
+ )
919
+ elif endpoint["Type"] == "WMTSCapabilities":
920
+ target_url = "%s" % (endpoint.get("EndPoint"),)
921
+ extra_fields = {"wmts:layer": endpoint.get("LayerId")}
922
+ dimensions = {}
923
+ if time != None:
924
+ dimensions["time"] = time
925
+ if dimensions_config := endpoint.get("Dimensions", {}):
926
+ for key, value in dimensions_config.items():
927
+ dimensions[key] = value
928
+ if dimensions != {}:
929
+ extra_fields["wmts:dimensions"] = dimensions
930
+ stac_object.add_link(
931
+ Link(
932
+ rel="wmts",
933
+ target=target_url,
934
+ media_type="image/png",
935
+ title="wmts capabilities",
936
+ extra_fields=extra_fields,
937
+ )
938
+ )
939
+ elif endpoint["Name"] == "VEDA":
940
+ if endpoint["Type"] == "cog":
941
+ target_url = generate_veda_cog_link(endpoint, file_url)
942
+ elif endpoint["Type"] == "tiles":
943
+ target_url = generate_veda_tiles_link(endpoint, file_url)
944
+ if target_url:
945
+ stac_object.add_link(
946
+ Link(
947
+ rel="xyz",
948
+ target=target_url,
949
+ media_type="image/png",
950
+ title=data["Name"],
951
+ )
952
+ )
953
+ elif endpoint["Name"] == "GeoDB Vector Tiles":
954
+ # `${geoserverUrl}${config.layerName}@EPSG%3A${projString}@pbf/{z}/{x}/{-y}.pbf`,
955
+ # 'geodb_debd884d-92f9-4979-87b6-eadef1139394:GTIF_AT_Gemeinden_3857'
956
+ target_url = "%s%s:%s_%s@EPSG:3857@pbf/{z}/{x}/{-y}.pbf" % (
957
+ endpoint["EndPoint"],
958
+ endpoint["Instance"],
959
+ endpoint["Database"],
960
+ endpoint["CollectionId"],
961
+ )
962
+ stac_object.add_link(
963
+ Link(
964
+ rel="xyz",
965
+ target=target_url,
966
+ media_type="application/pbf",
967
+ title=data["Name"],
968
+ extra_fields={
969
+ "description": data["Title"],
970
+ "parameters": endpoint["Parameters"],
971
+ "matchKey": endpoint["MatchKey"],
972
+ "timeKey": endpoint["TimeKey"],
973
+ "source": endpoint["Source"],
974
+ },
975
+ )
976
+ )
977
+ else:
978
+ print("Visualization endpoint not supported")
979
+
980
+
981
+ def process_STACAPI_Endpoint(
982
+ config,
983
+ endpoint,
984
+ data,
985
+ catalog,
986
+ headers={},
987
+ bbox=None,
988
+ root_collection=None,
989
+ filter_dates=None,
990
+ ):
991
+ collection, _ = get_or_create_collection(
992
+ catalog, endpoint["CollectionId"], data, config, endpoint
993
+ )
994
+ add_visualization_info(collection, data, endpoint)
995
+
996
+ api = Client.open(endpoint["EndPoint"], headers=headers)
997
+ if bbox == None:
998
+ bbox = "-180,-90,180,90"
999
+ results = api.search(
1000
+ collections=[endpoint["CollectionId"]],
1001
+ bbox=bbox,
1002
+ datetime=["1900-01-01T00:00:00Z", "3000-01-01T00:00:00Z"],
1003
+ )
1004
+ # We keep track of potential duplicate times in this list
1005
+ added_times = {}
1006
+ for item in results.items():
1007
+ item_datetime = item.get_datetime()
1008
+ if item_datetime != None:
1009
+ iso_date = item_datetime.isoformat()[:10]
1010
+ # if filterdates has been specified skip dates not listed in config
1011
+ if filter_dates and iso_date not in filter_dates:
1012
+ continue
1013
+ if iso_date in added_times:
1014
+ continue
1015
+ added_times[iso_date] = True
1016
+ link = collection.add_item(item)
1017
+ if options.tn:
1018
+ if "cog_default" in item.assets:
1019
+ generate_thumbnail(
1020
+ item, data, endpoint, item.assets["cog_default"].href
1021
+ )
1022
+ else:
1023
+ generate_thumbnail(item, data, endpoint)
1024
+ # Check if we can create visualization link
1025
+ if "Assets" in endpoint:
1026
+ add_visualization_info(item, data, endpoint, item.id)
1027
+ link.extra_fields["item"] = item.id
1028
+ elif "cog_default" in item.assets:
1029
+ add_visualization_info(
1030
+ item, data, endpoint, item.assets["cog_default"].href
1031
+ )
1032
+ link.extra_fields["cog_href"] = item.assets["cog_default"].href
1033
+ elif item_datetime:
1034
+ time_string = item_datetime.isoformat()[:-6] + "Z"
1035
+ add_visualization_info(item, data, endpoint, time=time_string)
1036
+ elif "start_datetime" in item.properties and "end_datetime" in item.properties:
1037
+ add_visualization_info(
1038
+ item,
1039
+ data,
1040
+ endpoint,
1041
+ time="%s/%s"
1042
+ % (item.properties["start_datetime"], item.properties["end_datetime"]),
1043
+ )
1044
+ # If a root collection exists we point back to it from the item
1045
+ if root_collection != None:
1046
+ item.set_collection(root_collection)
1047
+
1048
+ # bubble up information we want to the link
1049
+ # it is possible for datetime to be null, if it is start and end datetime have to exist
1050
+ if item_datetime:
1051
+ iso_time = item_datetime.isoformat()[:-6] + "Z"
1052
+ if endpoint["Name"] == "Sentinel Hub":
1053
+ # for SH WMS we only save the date (no time)
1054
+ link.extra_fields["datetime"] = iso_date
1055
+ else:
1056
+ link.extra_fields["datetime"] = iso_time
1057
+ else:
1058
+ link.extra_fields["start_datetime"] = item.properties["start_datetime"]
1059
+ link.extra_fields["end_datetime"] = item.properties["end_datetime"]
1060
+
1061
+ collection.update_extent_from_items()
1062
+
1063
+ # replace SH identifier with catalog identifier
1064
+ collection.id = data["Name"]
1065
+ add_collection_information(config, collection, data)
1066
+
1067
+ # Check if we need to overwrite the bbox after update from items
1068
+ if "OverwriteBBox" in endpoint:
1069
+ collection.extent.spatial = SpatialExtent(
1070
+ [
1071
+ endpoint["OverwriteBBox"],
1072
+ ]
1073
+ )
1074
+
1075
+ return collection
1076
+
1077
+
1078
+ def fetch_and_save_thumbnail(data, url):
1079
+ collection_path = "../thumbnails/%s_%s/" % (data["EodashIdentifier"], data["Name"])
1080
+ Path(collection_path).mkdir(parents=True, exist_ok=True)
1081
+ image_path = "%s/thumbnail.png" % (collection_path)
1082
+ if not os.path.exists(image_path):
1083
+ data = requests.get(url).content
1084
+ f = open(image_path, "wb")
1085
+ f.write(data)
1086
+ f.close()
1087
+
1088
+
1089
+ def generate_thumbnail(
1090
+ stac_object, data, endpoint, file_url=None, time=None, styles=None
1091
+ ):
1092
+ if endpoint["Name"] == "Sentinel Hub" or endpoint["Name"] == "WMS":
1093
+ instanceId = os.getenv("SH_INSTANCE_ID")
1094
+ if "InstanceId" in endpoint:
1095
+ instanceId = endpoint["InstanceId"]
1096
+ # Build example url
1097
+ wms_config = "REQUEST=GetMap&SERVICE=WMS&VERSION=1.3.0&FORMAT=image/png&STYLES=&TRANSPARENT=true"
1098
+ bbox = "%s,%s,%s,%s" % (
1099
+ stac_object.bbox[1],
1100
+ stac_object.bbox[0],
1101
+ stac_object.bbox[3],
1102
+ stac_object.bbox[2],
1103
+ )
1104
+ output_format = (
1105
+ "format=image/png&WIDTH=256&HEIGHT=128&CRS=EPSG:4326&BBOX=%s" % (bbox)
1106
+ )
1107
+ item_datetime = stac_object.get_datetime()
1108
+ # it is possible for datetime to be null, if it is start and end datetime have to exist
1109
+ if item_datetime:
1110
+ time = item_datetime.isoformat()[:-6] + "Z"
1111
+ url = "https://services.sentinel-hub.com/ogc/wms/%s?%s&layers=%s&time=%s&%s" % (
1112
+ instanceId,
1113
+ wms_config,
1114
+ endpoint["LayerId"],
1115
+ time,
1116
+ output_format,
1117
+ )
1118
+ fetch_and_save_thumbnail(data, url)
1119
+ elif endpoint["Name"] == "VEDA":
1120
+ target_url = generate_veda_cog_link(endpoint, file_url)
1121
+ # set to get 0/0/0 tile
1122
+ url = re.sub(r"\{.\}", "0", target_url)
1123
+ fetch_and_save_thumbnail(data, url)
1124
+
1125
+
1126
+ def process_STAC_Datacube_Endpoint(config, endpoint, data, catalog):
1127
+ collection, _ = get_or_create_collection(
1128
+ catalog, data["Name"], data, config, endpoint
1129
+ )
1130
+ add_visualization_info(collection, data, endpoint)
1131
+
1132
+ stac_endpoint_url = endpoint["EndPoint"]
1133
+ if endpoint.get("Name") == "xcube":
1134
+ stac_endpoint_url = stac_endpoint_url + endpoint.get("StacEndpoint", "")
1135
+ # assuming /search not implemented
1136
+ api = Client.open(stac_endpoint_url)
1137
+ coll = api.get_collection(endpoint.get("CollectionId", "datacubes"))
1138
+ item = coll.get_item(endpoint.get("DatacubeId"))
1139
+ # slice a datacube along temporal axis to individual items, selectively adding properties
1140
+ dimensions = item.properties.get("cube:dimensions", {})
1141
+ variables = item.properties.get("cube:variables")
1142
+ if not endpoint.get("Variable") in variables.keys():
1143
+ raise Exception(
1144
+ f'Variable {endpoint.get("Variable")} not found in datacube {variables}'
1145
+ )
1146
+ time_dimension = "time"
1147
+ for k, v in dimensions.items():
1148
+ if v.get("type") == "temporal":
1149
+ time_dimension = k
1150
+ break
1151
+ time_entries = dimensions.get(time_dimension).get("values")
1152
+ for t in time_entries:
1153
+ item = Item(
1154
+ id=t,
1155
+ bbox=item.bbox,
1156
+ properties={},
1157
+ geometry=item.geometry,
1158
+ datetime=parser.isoparse(t),
1159
+ )
1160
+ link = collection.add_item(item)
1161
+ link.extra_fields["datetime"] = t
1162
+ # bubble up information we want to the link
1163
+ item_datetime = item.get_datetime()
1164
+ # it is possible for datetime to be null, if it is start and end datetime have to exist
1165
+ if item_datetime:
1166
+ link.extra_fields["datetime"] = item_datetime.isoformat()[:-6] + "Z"
1167
+ else:
1168
+ link.extra_fields["start_datetime"] = item.properties["start_datetime"]
1169
+ link.extra_fields["end_datetime"] = item.properties["end_datetime"]
1170
+ unit = variables.get(endpoint.get("Variable")).get("unit")
1171
+ if unit and "yAxis" not in data:
1172
+ data["yAxis"] = unit
1173
+ collection.update_extent_from_items()
1174
+
1175
+ add_collection_information(config, collection, data)
1176
+
1177
+ return collection
1178
+
1179
+
1180
+ def add_collection_information(config, collection, data):
1181
+ # Add metadata information
1182
+ # Check license identifier
1183
+ if "License" in data:
1184
+ # Check if list was provided
1185
+ if isinstance(data["License"], list):
1186
+ if len(data["License"]) == 1:
1187
+ collection.license = "proprietary"
1188
+ link = Link(
1189
+ rel="license",
1190
+ target=data["License"][0]["Url"],
1191
+ media_type=(
1192
+ data["License"][0]["Type"]
1193
+ if "Type" in data["License"][0]
1194
+ else "text/html"
1195
+ ),
1196
+ )
1197
+ if "Title" in data["License"][0]:
1198
+ link.title = data["License"][0]["Title"]
1199
+ collection.links.append(link)
1200
+ elif len(data["License"]) > 1:
1201
+ collection.license = "various"
1202
+ for l in data["License"]:
1203
+ link = Link(
1204
+ rel="license",
1205
+ target=l["Url"],
1206
+ media_type="text/html" if "Type" in l else l["Type"],
1207
+ )
1208
+ if "Title" in l:
1209
+ link.title = l["Title"]
1210
+ collection.links.append(link)
1211
+ else:
1212
+ license = lookup.by_id(data["License"])
1213
+ if license is not None:
1214
+ collection.license = license.id
1215
+ if license.sources:
1216
+ # add links to licenses
1217
+ for source in license.sources:
1218
+ collection.links.append(
1219
+ Link(
1220
+ rel="license",
1221
+ target=source,
1222
+ media_type="text/html",
1223
+ )
1224
+ )
1225
+ else:
1226
+ # fallback to proprietary
1227
+ print(
1228
+ "WARNING: License could not be parsed, falling back to proprietary"
1229
+ )
1230
+ collection.license = "proprietary"
1231
+ else:
1232
+ # print("WARNING: No license was provided, falling back to proprietary")
1233
+ pass
1234
+
1235
+ if "Provider" in data:
1236
+ try:
1237
+ collection.providers = [
1238
+ Provider(
1239
+ # convert information to lower case
1240
+ **dict((k.lower(), v) for k, v in provider.items())
1241
+ )
1242
+ for provider in data["Provider"]
1243
+ ]
1244
+ except:
1245
+ print(
1246
+ "WARNING: Issue creating provider information for collection: %s"
1247
+ % collection.id
1248
+ )
1249
+
1250
+ if "Citation" in data:
1251
+ if "DOI" in data["Citation"]:
1252
+ collection.extra_fields["sci:doi"] = data["Citation"]["DOI"]
1253
+ if "Citation" in data["Citation"]:
1254
+ collection.extra_fields["sci:citation"] = data["Citation"]["Citation"]
1255
+ if "Publication" in data["Citation"]:
1256
+ collection.extra_fields["sci:publications"] = [
1257
+ # convert keys to lower case
1258
+ dict((k.lower(), v) for k, v in publication.items())
1259
+ for publication in data["Citation"]["Publication"]
1260
+ ]
1261
+
1262
+ if "Subtitle" in data:
1263
+ collection.extra_fields["subtitle"] = data["Subtitle"]
1264
+ if "Legend" in data:
1265
+ collection.add_asset(
1266
+ "legend",
1267
+ Asset(
1268
+ href="%s/%s" % (config["assets_endpoint"], data["Legend"]),
1269
+ media_type="image/png",
1270
+ roles=["metadata"],
1271
+ ),
1272
+ )
1273
+ if "Story" in data:
1274
+ collection.add_asset(
1275
+ "story",
1276
+ Asset(
1277
+ href="%s/%s" % (config["assets_endpoint"], data["Story"]),
1278
+ media_type="text/markdown",
1279
+ roles=["metadata"],
1280
+ ),
1281
+ )
1282
+ if "Image" in data:
1283
+ collection.add_asset(
1284
+ "thumbnail",
1285
+ Asset(
1286
+ href="%s/%s" % (config["assets_endpoint"], data["Image"]),
1287
+ media_type="image/png",
1288
+ roles=["thumbnail"],
1289
+ ),
1290
+ )
1291
+ # Add extra fields to collection if available
1292
+ add_extra_fields(collection, data)
1293
+
1294
+ if "References" in data:
1295
+ generic_counter = 1
1296
+ for ref in data["References"]:
1297
+ if "Key" in ref:
1298
+ key = ref["Key"]
1299
+ else:
1300
+ key = "reference_%s" % generic_counter
1301
+ generic_counter = generic_counter + 1
1302
+ collection.add_asset(
1303
+ key,
1304
+ Asset(
1305
+ href=ref["Url"],
1306
+ title=ref["Name"],
1307
+ media_type=ref["MediaType"] if "MediaType" in ref else "text/html",
1308
+ roles=["metadata"],
1309
+ ),
1310
+ )
1311
+
1312
+
1313
+ @dataclass
1314
+ class Options:
1315
+ catalogspath: str
1316
+ collectionsspath: str
1317
+ outputpath: str
1318
+ vd: bool
1319
+ ni: bool
1320
+ tn: bool
1321
+ collections: List[str]
1322
+
1323
+
1324
+ @click.command()
1325
+ @click.option(
1326
+ "--catalogspath",
1327
+ "-ctp",
1328
+ help="path to catalog configuration files",
1329
+ default="./catalogs/",
1330
+ )
1331
+ @click.option(
1332
+ "--collectionsspath",
1333
+ "-clp",
1334
+ help="path to collection configuration files",
1335
+ default="./collections/",
1336
+ )
1337
+ @click.option(
1338
+ "--outputpath",
1339
+ "-o",
1340
+ help="path where the generated catalogs will be saved",
1341
+ default="./build/",
1342
+ )
1343
+ @click.option(
1344
+ "-vd",
1345
+ is_flag=True,
1346
+ help="validation flag, if set, validation will be run on generated catalogs",
1347
+ )
1348
+ @click.option(
1349
+ "-ni", is_flag=True, help="no items flag, if set, items will not be saved"
1350
+ )
1351
+ @click.option(
1352
+ "-tn",
1353
+ is_flag=True,
1354
+ help="generate additionally thumbnail image for supported collections",
1355
+ )
1356
+ @click.argument(
1357
+ "collections",
1358
+ nargs=-1,
1359
+ )
1360
+ def process_catalogs(
1361
+ catalogspath, collectionsspath, outputpath, vd, ni, tn, collections
1362
+ ):
1363
+ """STAC generator and harvester:
1364
+ This library goes over configured endpoints extracting as much information
1365
+ as possible and generating a STAC catalog with the information"""
1366
+ options = Options(
1367
+ catalogspath=catalogspath,
1368
+ collectionsspath=collectionsspath,
1369
+ outputpath=outputpath,
1370
+ vd=vd,
1371
+ ni=ni,
1372
+ tn=tn,
1373
+ collections=collections,
1374
+ )
1375
+ tasks = []
1376
+ for file_name in os.listdir(catalogspath):
1377
+ file_path = os.path.join(catalogspath, file_name)
1378
+ if os.path.isfile(file_path):
1379
+ tasks.append(
1380
+ RaisingThread(target=process_catalog_file, args=(file_path, options))
1381
+ )
1382
+ tasks[-1].start()
1383
+ for task in tasks:
1384
+ task.join()