eodash_catalog 0.0.6__py3-none-any.whl → 0.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of eodash_catalog might be problematic. Click here for more details.

@@ -3,74 +3,70 @@
3
3
  Indicator generator to harvest information from endpoints and generate catalog
4
4
 
5
5
  """
6
+
7
+ import os
6
8
  import time
7
- import requests
8
- import json
9
- from pystac_client import Client
10
9
  from dataclasses import dataclass
11
- from typing import List
12
- import os
13
- import re
14
- from pathlib import Path
15
- from datetime import datetime, timedelta
16
- from dotenv import load_dotenv
10
+
11
+ import click
17
12
  import yaml
18
- from yaml.loader import SafeLoader
19
- from itertools import groupby
20
- from operator import itemgetter
21
- from dateutil import parser
22
- from eodash_catalog.sh_endpoint import get_SH_token
23
- from eodash_catalog.utils import (
24
- create_geojson_point,
25
- retrieveExtentFromWMSWMTS,
26
- generateDateIsostringsFromInterval,
27
- RaisingThread,
28
- )
13
+ from dotenv import load_dotenv
29
14
  from pystac import (
30
- Item,
31
- Asset,
32
15
  Catalog,
33
- Link,
34
16
  CatalogType,
35
17
  Collection,
36
- Extent,
37
- SpatialExtent,
38
- TemporalExtent,
39
18
  Summaries,
40
- Provider,
41
19
  )
42
20
  from pystac.layout import TemplateLayoutStrategy
43
21
  from pystac.validation import validate_all
44
- import spdx_lookup as lookup
45
- import argparse
46
-
47
- import click
22
+ from yaml.loader import SafeLoader
48
23
 
24
+ from eodash_catalog.endpoints import (
25
+ handle_collection_only,
26
+ handle_GeoDB_endpoint,
27
+ handle_GeoDB_Tiles_endpoint,
28
+ handle_SH_endpoint,
29
+ handle_SH_WMS_endpoint,
30
+ handle_VEDA_endpoint,
31
+ handle_WMS_endpoint,
32
+ handle_xcube_endpoint,
33
+ )
34
+ from eodash_catalog.stac_handling import (
35
+ add_base_overlay_info,
36
+ add_collection_information,
37
+ add_extra_fields,
38
+ get_or_create_collection_and_times,
39
+ )
40
+ from eodash_catalog.utils import (
41
+ RaisingThread,
42
+ iter_len_at_least,
43
+ recursive_save,
44
+ )
49
45
 
50
46
  # make sure we are loading the env local definition
51
47
  load_dotenv()
52
48
 
53
49
 
54
- def recursive_save(stac_object, no_items=False):
55
- stac_object.save_object()
56
- for child in stac_object.get_children():
57
- recursive_save(child, no_items)
58
- if not no_items:
59
- # try to save items if available
60
- for item in stac_object.get_items():
61
- item.save_object()
50
+ @dataclass
51
+ class Options:
52
+ catalogspath: str
53
+ collectionspath: str
54
+ indicatorspath: str
55
+ outputpath: str
56
+ vd: bool
57
+ ni: bool
58
+ tn: bool
59
+ collections: list[str]
62
60
 
63
61
 
64
- def process_catalog_file(file_path, options):
62
+ def process_catalog_file(file_path: str, options: Options):
65
63
  print("Processing catalog:", file_path)
66
64
  with open(file_path) as f:
67
- config = yaml.load(f, Loader=SafeLoader)
65
+ config: dict = yaml.load(f, Loader=SafeLoader)
68
66
 
69
67
  if len(options.collections) > 0:
70
68
  # create only catalogs containing the passed collections
71
- process_collections = [
72
- c for c in config["collections"] if c in options.collections
73
- ]
69
+ process_collections = [c for c in config["collections"] if c in options.collections]
74
70
  elif (len(options.collections) == 1 and options.collections == "all") or len(
75
71
  options.collections
76
72
  ) == 0:
@@ -86,49 +82,51 @@ def process_catalog_file(file_path, options):
86
82
  catalog_type=CatalogType.RELATIVE_PUBLISHED,
87
83
  )
88
84
  for collection in process_collections:
89
- file_path = "%s/%s.yaml" % (options.collectionspath, collection)
85
+ file_path = f"{options.collectionspath}/{collection}.yaml"
90
86
  if os.path.isfile(file_path):
91
87
  # if collection file exists process it as indicator
92
88
  # collection will be added as single collection to indicator
93
89
  process_indicator_file(config, file_path, catalog, options)
94
90
  else:
95
91
  # if not try to see if indicator definition available
96
- file_path = "%s/%s.yaml" % (options.indicatorspath, collection)
92
+ file_path = f"{options.indicatorspath}/{collection}.yaml"
97
93
  if os.path.isfile(file_path):
98
94
  process_indicator_file(
99
95
  config,
100
- "%s/%s.yaml" % (options.indicatorspath, collection),
96
+ f"{options.indicatorspath}/{collection}.yaml",
101
97
  catalog,
102
98
  options,
103
99
  )
104
100
  else:
105
- print(
106
- "Warning: neither collection nor indicator found for %s"
107
- % collection
108
- )
101
+ print(f"Warning: neither collection nor indicator found for {collection}")
109
102
 
110
103
  strategy = TemplateLayoutStrategy(item_template="${collection}/${year}")
111
- catalog.normalize_hrefs(
112
- "%s/%s" % (options.outputpath, config["id"]), strategy=strategy
113
- )
104
+ # expecting that the catalog will be hosted online, self url should correspond to that
105
+ # default to a local folder + catalog id in case not set
114
106
 
115
107
  print("Started creation of collection files")
116
108
  start = time.time()
117
109
  if options.ni:
110
+ catalog_self_href = f'{options.outputpath}/{config["id"]}'
111
+ catalog.normalize_hrefs(catalog_self_href, strategy=strategy)
118
112
  recursive_save(catalog, options.ni)
119
113
  else:
120
114
  # For full catalog save with items this still seems to be faster
121
- catalog.save(dest_href="%s/%s" % (options.outputpath, config["id"]))
115
+ catalog_self_href = config.get(
116
+ "endpoint", "{}/{}".format(options.outputpath, config["id"])
117
+ )
118
+ catalog.normalize_hrefs(catalog_self_href, strategy=strategy)
119
+ catalog.save(dest_href="{}/{}".format(options.outputpath, config["id"]))
122
120
  end = time.time()
123
121
  print(f"Catalog {config['id']}: Time consumed in saving: {end - start}")
124
122
 
125
123
  if options.vd:
126
124
  # try to validate catalog if flag was set
127
- print("Running validation of catalog %s" % file_path)
125
+ print(f"Running validation of catalog {file_path}")
128
126
  try:
129
127
  validate_all(catalog.to_dict(), href=config["endpoint"])
130
128
  except Exception as e:
131
- print("Issue validation collection: %s" % e)
129
+ print(f"Issue validation collection: {e}")
132
130
 
133
131
 
134
132
  def extract_indicator_info(parent_collection):
@@ -155,10 +153,9 @@ def extract_indicator_info(parent_collection):
155
153
  else:
156
154
  summaries[key].add(param)
157
155
  # extract also summary information
158
- if collection.summaries.lists:
159
- if key in collection.summaries.lists:
160
- for p in collection.summaries.lists[key]:
161
- summaries[key].add(p)
156
+ if collection.summaries.lists and collection.summaries.lists.get(key):
157
+ for p in collection.summaries.lists[key]:
158
+ summaries[key].add(p)
162
159
 
163
160
  for key in to_extract:
164
161
  # convert all items back to a list
@@ -169,23 +166,19 @@ def extract_indicator_info(parent_collection):
169
166
  parent_collection.summaries = Summaries(summaries)
170
167
 
171
168
 
172
- def iter_len_at_least(i, n):
173
- return sum(1 for _ in zip(range(n), i)) == n
174
-
175
-
176
- def process_indicator_file(config, file_path, catalog, options):
169
+ def process_indicator_file(config: dict, file_path: str, catalog: Catalog, options: Options):
177
170
  with open(file_path) as f:
178
171
  print("Processing indicator:", file_path)
179
- data = yaml.load(f, Loader=SafeLoader)
180
- parent_indicator, _ = get_or_create_collection(
181
- catalog, data["Name"], data, config
172
+ data: dict = yaml.load(f, Loader=SafeLoader)
173
+ parent_indicator, _ = get_or_create_collection_and_times(
174
+ catalog, data["Name"], data, config, {}
182
175
  )
183
176
  if "Collections" in data:
184
177
  for collection in data["Collections"]:
185
178
  process_collection_file(
186
179
  config,
187
- "%s/%s.yaml" % (options.collectionspath, collection),
188
- catalog,
180
+ f"{options.collectionspath}/{collection}.yaml",
181
+ parent_indicator,
189
182
  options,
190
183
  )
191
184
  else:
@@ -196,9 +189,7 @@ def process_indicator_file(config, file_path, catalog, options):
196
189
  parent_indicator.update_extent_from_items()
197
190
  # Add bbox extents from children
198
191
  for c_child in parent_indicator.get_children():
199
- parent_indicator.extent.spatial.bboxes.append(
200
- c_child.extent.spatial.bboxes[0]
201
- )
192
+ parent_indicator.extent.spatial.bboxes.append(c_child.extent.spatial.bboxes[0])
202
193
  # extract collection information and add it to summary indicator level
203
194
  extract_indicator_info(parent_indicator)
204
195
  # add baselayer and overview information to indicator collection
@@ -206,51 +197,54 @@ def process_indicator_file(config, file_path, catalog, options):
206
197
  add_to_catalog(parent_indicator, catalog, None, data)
207
198
 
208
199
 
209
- def process_collection_file(config, file_path, catalog, options):
200
+ def process_collection_file(
201
+ config: dict, file_path: str, catalog: Catalog | Collection, options: Options
202
+ ):
210
203
  print("Processing collection:", file_path)
211
204
  with open(file_path) as f:
212
- data = yaml.load(f, Loader=SafeLoader)
205
+ data: dict = yaml.load(f, Loader=SafeLoader)
213
206
  if "Resources" in data:
214
207
  for resource in data["Resources"]:
215
208
  if "EndPoint" in resource:
209
+ collection = None
216
210
  if resource["Name"] == "Sentinel Hub":
217
- handle_SH_endpoint(config, resource, data, catalog, options)
211
+ collection = handle_SH_endpoint(config, resource, data, catalog, options)
218
212
  elif resource["Name"] == "Sentinel Hub WMS":
219
- collection = handle_SH_WMS_endpoint(
220
- config, resource, data, catalog
221
- )
213
+ collection = handle_SH_WMS_endpoint(config, resource, data, catalog)
222
214
  elif resource["Name"] == "GeoDB":
223
- collection = handle_GeoDB_endpoint(
224
- config, resource, data, catalog
225
- )
226
- add_to_catalog(collection, catalog, resource, data)
215
+ collection = handle_GeoDB_endpoint(config, resource, data, catalog)
227
216
  elif resource["Name"] == "VEDA":
228
- handle_VEDA_endpoint(config, resource, data, catalog, options)
217
+ collection = handle_VEDA_endpoint(config, resource, data, catalog, options)
229
218
  elif resource["Name"] == "marinedatastore":
230
- handle_WMS_endpoint(config, resource, data, catalog, wmts=True)
219
+ collection = handle_WMS_endpoint(config, resource, data, catalog, wmts=True)
231
220
  elif resource["Name"] == "xcube":
232
- handle_xcube_endpoint(config, resource, data, catalog)
221
+ collection = handle_xcube_endpoint(config, resource, data, catalog)
233
222
  elif resource["Name"] == "WMS":
234
- handle_WMS_endpoint(config, resource, data, catalog)
223
+ collection = handle_WMS_endpoint(config, resource, data, catalog)
235
224
  elif resource["Name"] == "GeoDB Vector Tiles":
236
- handle_GeoDB_Tiles_endpoint(config, resource, data, catalog)
225
+ collection = handle_GeoDB_Tiles_endpoint(config, resource, data, catalog)
237
226
  elif resource["Name"] == "JAXA_WMTS_PALSAR":
238
227
  # somewhat one off creation of individual WMTS layers as individual items
239
- handle_WMS_endpoint(config, resource, data, catalog, wmts=True)
228
+ collection = handle_WMS_endpoint(config, resource, data, catalog, wmts=True)
240
229
  elif resource["Name"] == "Collection-only":
241
- handle_collection_only(config, resource, data, catalog)
230
+ collection = handle_collection_only(config, resource, data, catalog)
242
231
  else:
243
232
  raise ValueError("Type of Resource is not supported")
233
+ if collection is not None:
234
+ add_to_catalog(collection, catalog, resource, data)
235
+ else:
236
+ raise Exception("No collection generated")
244
237
  elif "Subcollections" in data:
245
238
  # if no endpoint is specified we check for definition of subcollections
246
- parent_collection, _ = get_or_create_collection(
247
- catalog, data["Name"], data, config
239
+ parent_collection, _ = get_or_create_collection_and_times(
240
+ catalog, data["Name"], data, config, {}
248
241
  )
249
242
 
250
243
  locations = []
251
244
  countries = []
252
245
  for sub_coll_def in data["Subcollections"]:
253
- # Subcollection has only data on one location which is defined for the entire collection
246
+ # Subcollection has only data on one location which
247
+ # is defined for the entire collection
254
248
  if "Name" in sub_coll_def and "Point" in sub_coll_def:
255
249
  locations.append(sub_coll_def["Name"])
256
250
  if isinstance(sub_coll_def["Country"], list):
@@ -259,8 +253,7 @@ def process_collection_file(config, file_path, catalog, options):
259
253
  countries.append(sub_coll_def["Country"])
260
254
  process_collection_file(
261
255
  config,
262
- "%s/%s.yaml"
263
- % (options.collectionspath, sub_coll_def["Collection"]),
256
+ "{}/{}.yaml".format(options.collectionspath, sub_coll_def["Collection"]),
264
257
  parent_collection,
265
258
  options,
266
259
  )
@@ -271,7 +264,7 @@ def process_collection_file(config, file_path, catalog, options):
271
264
  and "id" in link.extra_fields
272
265
  and link.extra_fields["id"] == sub_coll_def["Identifier"]
273
266
  ):
274
- latlng = "%s,%s" % (
267
+ latlng = "{},{}".format(
275
268
  sub_coll_def["Point"][1],
276
269
  sub_coll_def["Point"][0],
277
270
  )
@@ -279,27 +272,21 @@ def process_collection_file(config, file_path, catalog, options):
279
272
  link.extra_fields["latlng"] = latlng
280
273
  link.extra_fields["name"] = sub_coll_def["Name"]
281
274
  # Update title of collection to use location name
282
- sub_collection = parent_collection.get_child(
283
- id=sub_coll_def["Identifier"]
284
- )
275
+ sub_collection = parent_collection.get_child(id=sub_coll_def["Identifier"])
285
276
  if sub_collection:
286
277
  sub_collection.title = sub_coll_def["Name"]
287
- # The subcollection has multiple locations which need to be extracted and elevated to parent collection level
278
+ # The subcollection has multiple locations which need to be extracted
279
+ # and elevated to parent collection level
288
280
  else:
289
281
  # create temp catalog to save collection
290
- tmp_catalog = Catalog(
291
- id="tmp_catalog", description="temp catalog placeholder"
292
- )
282
+ tmp_catalog = Catalog(id="tmp_catalog", description="temp catalog placeholder")
293
283
  process_collection_file(
294
284
  config,
295
- "%s/%s.yaml"
296
- % (options.collectionspath, sub_coll_def["Collection"]),
285
+ "{}/{}.yaml".format(options.collectionspath, sub_coll_def["Collection"]),
297
286
  tmp_catalog,
298
287
  options,
299
288
  )
300
- links = tmp_catalog.get_child(
301
- sub_coll_def["Identifier"]
302
- ).get_links()
289
+ links = tmp_catalog.get_child(sub_coll_def["Identifier"]).get_links() # type: ignore
303
290
  for link in links:
304
291
  # extract summary information
305
292
  if "city" in link.extra_fields:
@@ -316,9 +303,7 @@ def process_collection_file(config, file_path, catalog, options):
316
303
  parent_collection.update_extent_from_items()
317
304
  # Add bbox extents from children
318
305
  for c_child in parent_collection.get_children():
319
- parent_collection.extent.spatial.bboxes.append(
320
- c_child.extent.spatial.bboxes[0]
321
- )
306
+ parent_collection.extent.spatial.bboxes.append(c_child.extent.spatial.bboxes[0])
322
307
  # Fill summaries for locations
323
308
  parent_collection.summaries = Summaries(
324
309
  {
@@ -329,219 +314,6 @@ def process_collection_file(config, file_path, catalog, options):
329
314
  add_to_catalog(parent_collection, catalog, None, data)
330
315
 
331
316
 
332
- def handle_collection_only(config, endpoint, data, catalog):
333
- collection, times = get_or_create_collection(
334
- catalog, data["Name"], data, config, endpoint
335
- )
336
- if len(times) > 0 and not endpoint.get("Disable_Items"):
337
- for t in times:
338
- item = Item(
339
- id=t,
340
- bbox=endpoint.get("OverwriteBBox"),
341
- properties={},
342
- geometry=None,
343
- datetime=parser.isoparse(t),
344
- )
345
- link = collection.add_item(item)
346
- link.extra_fields["datetime"] = t
347
- add_collection_information(config, collection, data)
348
- add_to_catalog(collection, catalog, None, data)
349
-
350
-
351
- def handle_WMS_endpoint(config, endpoint, data, catalog, wmts=False):
352
- collection, times = get_or_create_collection(
353
- catalog, data["Name"], data, config, endpoint
354
- )
355
- spatial_extent = collection.extent.spatial.to_dict().get(
356
- "bbox", [-180, -90, 180, 90]
357
- )[0]
358
- if not endpoint.get("Type") == "OverwriteTimes" or not endpoint.get(
359
- "OverwriteBBox"
360
- ):
361
- # some endpoints allow "narrowed-down" capabilities per-layer, which we utilize to not
362
- # have to process full service capabilities XML
363
- capabilities_url = endpoint["EndPoint"]
364
- spatial_extent, times = retrieveExtentFromWMSWMTS(
365
- capabilities_url, endpoint["LayerId"], wmts=wmts
366
- )
367
- # Create an item per time to allow visualization in stac clients
368
- if len(times) > 0 and not endpoint.get("Disable_Items"):
369
- for t in times:
370
- item = Item(
371
- id=t,
372
- bbox=spatial_extent,
373
- properties={},
374
- geometry=None,
375
- datetime=parser.isoparse(t),
376
- stac_extensions=[
377
- "https://stac-extensions.github.io/web-map-links/v1.1.0/schema.json",
378
- ],
379
- )
380
- add_visualization_info(item, data, endpoint, time=t)
381
- link = collection.add_item(item)
382
- link.extra_fields["datetime"] = t
383
- collection.update_extent_from_items()
384
-
385
- # Check if we should overwrite bbox
386
- if "OverwriteBBox" in endpoint:
387
- collection.extent.spatial = SpatialExtent(
388
- [
389
- endpoint["OverwriteBBox"],
390
- ]
391
- )
392
- add_collection_information(config, collection, data)
393
- add_to_catalog(collection, catalog, endpoint, data)
394
-
395
-
396
- def handle_SH_endpoint(config, endpoint, data, catalog, options):
397
- token = get_SH_token()
398
- headers = {"Authorization": "Bearer %s" % token}
399
- endpoint["EndPoint"] = "https://services.sentinel-hub.com/api/v1/catalog/1.0.0/"
400
- # Overwrite collection id with type, such as ZARR or BYOC
401
- if "Type" in endpoint:
402
- endpoint["CollectionId"] = endpoint["Type"] + "-" + endpoint["CollectionId"]
403
- handle_STAC_based_endpoint(config, endpoint, data, catalog, options, headers)
404
-
405
-
406
- def handle_SH_WMS_endpoint(config, endpoint, data, catalog):
407
- # create collection and subcollections (based on locations)
408
- if "Locations" in data:
409
- root_collection, _ = get_or_create_collection(
410
- catalog, data["Name"], data, config, endpoint
411
- )
412
- for location in data["Locations"]:
413
- # create and populate location collections based on times
414
- # TODO: Should we add some new description per location?
415
- location_config = {
416
- "Title": location["Name"],
417
- "Description": "",
418
- }
419
- collection, _ = get_or_create_collection(
420
- catalog, location["Identifier"], location_config, config, endpoint
421
- )
422
- collection.extra_fields["endpointtype"] = endpoint["Name"]
423
- for time in location["Times"]:
424
- item = Item(
425
- id=time,
426
- bbox=location["Bbox"],
427
- properties={},
428
- geometry=None,
429
- datetime=parser.isoparse(time),
430
- stac_extensions=[
431
- "https://stac-extensions.github.io/web-map-links/v1.1.0/schema.json",
432
- ],
433
- )
434
- add_visualization_info(item, data, endpoint, time=time)
435
- item_link = collection.add_item(item)
436
- item_link.extra_fields["datetime"] = time
437
-
438
- link = root_collection.add_child(collection)
439
- # bubble up information we want to the link
440
- latlng = "%s,%s" % (location["Point"][1], location["Point"][0])
441
- link.extra_fields["id"] = location["Identifier"]
442
- link.extra_fields["latlng"] = latlng
443
- link.extra_fields["country"] = location["Country"]
444
- link.extra_fields["city"] = location["Name"]
445
- collection.update_extent_from_items()
446
- add_visualization_info(collection, data, endpoint)
447
-
448
- root_collection.update_extent_from_items()
449
- # Add bbox extents from children
450
- for c_child in root_collection.get_children():
451
- root_collection.extent.spatial.bboxes.append(
452
- c_child.extent.spatial.bboxes[0]
453
- )
454
- add_to_catalog(root_collection, catalog, endpoint, data)
455
- return root_collection
456
-
457
-
458
- def handle_VEDA_endpoint(config, endpoint, data, catalog, options):
459
- handle_STAC_based_endpoint(config, endpoint, data, catalog, options)
460
-
461
-
462
- def handle_xcube_endpoint(config, endpoint, data, catalog):
463
- root_collection = process_STAC_Datacube_Endpoint(
464
- config=config,
465
- endpoint=endpoint,
466
- data=data,
467
- catalog=catalog,
468
- )
469
-
470
- add_example_info(root_collection, data, endpoint, config)
471
- add_to_catalog(root_collection, catalog, endpoint, data)
472
-
473
-
474
- def get_or_create_collection(catalog, collection_id, data, config, endpoint=None):
475
- # Check if collection already in catalog
476
- for collection in catalog.get_collections():
477
- if collection.id == collection_id:
478
- return collection, []
479
- # If none found create a new one
480
- spatial_extent = [-180.0, -90.0, 180.0, 90.0]
481
- if endpoint and endpoint.get("OverwriteBBox"):
482
- spatial_extent = endpoint.get("OverwriteBBox")
483
- spatial_extent = SpatialExtent(
484
- [
485
- spatial_extent,
486
- ]
487
- )
488
- times = []
489
- temporal_extent = TemporalExtent([[datetime.now(), None]])
490
- if endpoint and endpoint.get("Type") == "OverwriteTimes":
491
- if endpoint.get("Times"):
492
- times = endpoint.get("Times")
493
- times_datetimes = sorted([parser.isoparse(time) for time in times])
494
- temporal_extent = TemporalExtent(
495
- [[times_datetimes[0], times_datetimes[-1]]]
496
- )
497
- elif endpoint.get("DateTimeInterval"):
498
- start = endpoint["DateTimeInterval"].get("Start", "2020-09-01T00:00:00")
499
- end = endpoint["DateTimeInterval"].get("End", "2020-10-01T00:00:00")
500
- timedelta_config = endpoint["DateTimeInterval"].get(
501
- "Timedelta", {"days": 1}
502
- )
503
- times = generateDateIsostringsFromInterval(start, end, timedelta_config)
504
- times_datetimes = sorted([parser.isoparse(time) for time in times])
505
- temporal_extent = TemporalExtent(
506
- [[times_datetimes[0], times_datetimes[-1]]]
507
- )
508
- extent = Extent(spatial=spatial_extent, temporal=temporal_extent)
509
-
510
- # Check if description is link to markdown file
511
- if "Description" in data:
512
- description = data["Description"]
513
- if description.endswith((".md", ".MD")):
514
- if description.startswith(("http")):
515
- # if full absolute path is defined
516
- response = requests.get(description)
517
- if response.status_code == 200:
518
- description = response.text
519
- elif "Subtitle" in data:
520
- print("WARNING: Markdown file could not be fetched")
521
- description = data["Subtitle"]
522
- else:
523
- # relative path to assets was given
524
- response = requests.get(
525
- "%s/%s" % (config["assets_endpoint"], description)
526
- )
527
- if response.status_code == 200:
528
- description = response.text
529
- elif "Subtitle" in data:
530
- print("WARNING: Markdown file could not be fetched")
531
- description = data["Subtitle"]
532
- elif "Subtitle" in data:
533
- # Try to use at least subtitle to fill some information
534
- description = data["Subtitle"]
535
-
536
- collection = Collection(
537
- id=collection_id,
538
- title=data["Title"],
539
- description=description,
540
- extent=extent,
541
- )
542
- return (collection, times)
543
-
544
-
545
317
  def add_to_catalog(collection, catalog, endpoint, data):
546
318
  # check if already in catalog, if it is do not re-add it
547
319
  # TODO: probably we should add to the catalog only when creating
@@ -552,20 +324,17 @@ def add_to_catalog(collection, catalog, endpoint, data):
552
324
  link = catalog.add_child(collection)
553
325
  # bubble fields we want to have up to collection link and add them to collection
554
326
  if endpoint and "Type" in endpoint:
555
- collection.extra_fields["endpointtype"] = "%s_%s" % (
327
+ collection.extra_fields["endpointtype"] = "{}_{}".format(
556
328
  endpoint["Name"],
557
329
  endpoint["Type"],
558
330
  )
559
- link.extra_fields["endpointtype"] = "%s_%s" % (
331
+ link.extra_fields["endpointtype"] = "{}_{}".format(
560
332
  endpoint["Name"],
561
333
  endpoint["Type"],
562
334
  )
563
335
  elif endpoint:
564
336
  collection.extra_fields["endpointtype"] = endpoint["Name"]
565
337
  link.extra_fields["endpointtype"] = endpoint["Name"]
566
- # Disabling bubbling up of description as now it is considered to be
567
- # used as markdown loading would increase the catalog size unnecessarily
568
- # link.extra_fields["description"] = collection.description
569
338
  if "Subtitle" in data:
570
339
  link.extra_fields["subtitle"] = data["Subtitle"]
571
340
  link.extra_fields["title"] = collection.title
@@ -575,934 +344,13 @@ def add_to_catalog(collection, catalog, endpoint, data):
575
344
  link.extra_fields["themes"] = data["Themes"]
576
345
  # Check for summaries and bubble up info
577
346
  if collection.summaries.lists:
578
- for sum in collection.summaries.lists:
579
- link.extra_fields[sum] = collection.summaries.lists[sum]
347
+ for summary in collection.summaries.lists:
348
+ link.extra_fields[summary] = collection.summaries.lists[summary]
580
349
 
581
350
  add_extra_fields(link, data)
582
351
  return link
583
352
 
584
353
 
585
- def add_extra_fields(stac_object, data):
586
- if "yAxis" in data:
587
- stac_object.extra_fields["yAxis"] = data["yAxis"]
588
- if "Themes" in data:
589
- stac_object.extra_fields["themes"] = data["Themes"]
590
- if "Locations" in data or "Subcollections" in data:
591
- stac_object.extra_fields["locations"] = True
592
- if "Tags" in data:
593
- stac_object.extra_fields["tags"] = data["Tags"]
594
- if "Satellite" in data:
595
- stac_object.extra_fields["satellite"] = data["Satellite"]
596
- if "Sensor" in data:
597
- stac_object.extra_fields["sensor"] = data["Sensor"]
598
- if "Agency" in data:
599
- stac_object.extra_fields["agency"] = data["Agency"]
600
- if "yAxis" in data:
601
- stac_object.extra_fields["yAxis"] = data["yAxis"]
602
- if "EodashIdentifier" in data:
603
- stac_object.extra_fields["subcode"] = data["EodashIdentifier"]
604
- if "DataSource" in data:
605
- if "Spaceborne" in data["DataSource"]:
606
- if "Sensor" in data["DataSource"]["Spaceborne"]:
607
- stac_object.extra_fields["sensor"] = data["DataSource"]["Spaceborne"][
608
- "Sensor"
609
- ]
610
- if "Satellite" in data["DataSource"]["Spaceborne"]:
611
- stac_object.extra_fields["satellite"] = data["DataSource"][
612
- "Spaceborne"
613
- ]["Satellite"]
614
- if "InSitu" in data["DataSource"]:
615
- stac_object.extra_fields["insituSources"] = data["DataSource"]["InSitu"]
616
- if "Other" in data["DataSource"]:
617
- stac_object.extra_fields["otherSources"] = data["DataSource"]["Other"]
618
-
619
-
620
- def handle_GeoDB_endpoint(config, endpoint, data, catalog):
621
- collection, _ = get_or_create_collection(
622
- catalog, endpoint["CollectionId"], data, config, endpoint
623
- )
624
- select = "?select=aoi,aoi_id,country,city,time"
625
- url = (
626
- endpoint["EndPoint"]
627
- + endpoint["Database"]
628
- + "_%s" % endpoint["CollectionId"]
629
- + select
630
- )
631
- if additional_query_parameters := endpoint.get("AdditionalQueryString"):
632
- url += f"&{additional_query_parameters}"
633
- response = json.loads(requests.get(url).text)
634
-
635
- # Sort locations by key
636
- sorted_locations = sorted(response, key=itemgetter("aoi_id"))
637
- cities = []
638
- countries = []
639
- for key, value in groupby(sorted_locations, key=itemgetter("aoi_id")):
640
- # Finding min and max values for date
641
- values = [v for v in value]
642
- times = [datetime.fromisoformat(t["time"]) for t in values]
643
- unique_values = list({v["aoi_id"]: v for v in values}.values())[0]
644
- country = unique_values["country"]
645
- city = unique_values["city"]
646
- IdKey = endpoint.get("IdKey", "city")
647
- IdValue = unique_values[IdKey]
648
- if country not in countries:
649
- countries.append(country)
650
- # sanitize unique key identifier to be sure it is saveable as a filename
651
- if IdValue is not None:
652
- IdValue = "".join(
653
- [c for c in IdValue if c.isalpha() or c.isdigit() or c == " "]
654
- ).rstrip()
655
- # Additional check to see if unique key name is empty afterwards
656
- if IdValue == "" or IdValue is None:
657
- # use aoi_id as a fallback unique id instead of configured key
658
- IdValue = key
659
- if city not in cities:
660
- cities.append(city)
661
- min_date = min(times)
662
- max_date = max(times)
663
- latlon = unique_values["aoi"]
664
- [lat, lon] = [float(x) for x in latlon.split(",")]
665
- # create item for unique locations
666
- buff = 0.01
667
- bbox = [lon - buff, lat - buff, lon + buff, lat + buff]
668
- item = Item(
669
- id=IdValue,
670
- bbox=bbox,
671
- properties={},
672
- geometry=create_geojson_point(lon, lat),
673
- datetime=None,
674
- start_datetime=min_date,
675
- end_datetime=max_date,
676
- )
677
- link = collection.add_item(item)
678
- # bubble up information we want to the link
679
- link.extra_fields["id"] = key
680
- link.extra_fields["latlng"] = latlon
681
- link.extra_fields["country"] = country
682
- link.extra_fields["city"] = city
683
-
684
- if "yAxis" not in data:
685
- # fetch yAxis and store it to data, preventing need to save it per dataset in yml
686
- select = "?select=y_axis&limit=1"
687
- url = (
688
- endpoint["EndPoint"]
689
- + endpoint["Database"]
690
- + "_%s" % endpoint["CollectionId"]
691
- + select
692
- )
693
- response = json.loads(requests.get(url).text)
694
- yAxis = response[0]["y_axis"]
695
- data["yAxis"] = yAxis
696
- add_collection_information(config, collection, data)
697
- add_example_info(collection, data, endpoint, config)
698
-
699
- collection.update_extent_from_items()
700
- collection.summaries = Summaries(
701
- {
702
- "cities": cities,
703
- "countries": countries,
704
- }
705
- )
706
- return collection
707
-
708
-
709
- def handle_STAC_based_endpoint(config, endpoint, data, catalog, options, headers=None):
710
- if "Locations" in data:
711
- root_collection, _ = get_or_create_collection(
712
- catalog, data["Name"], data, config, endpoint
713
- )
714
- for location in data["Locations"]:
715
- if "FilterDates" in location:
716
- collection = process_STACAPI_Endpoint(
717
- config=config,
718
- endpoint=endpoint,
719
- data=data,
720
- catalog=catalog,
721
- options=options,
722
- headers=headers,
723
- bbox=",".join(map(str, location["Bbox"])),
724
- filter_dates=location["FilterDates"],
725
- root_collection=root_collection,
726
- )
727
- else:
728
- collection = process_STACAPI_Endpoint(
729
- config=config,
730
- endpoint=endpoint,
731
- data=data,
732
- catalog=catalog,
733
- options=options,
734
- headers=headers,
735
- bbox=",".join(map(str, location["Bbox"])),
736
- root_collection=root_collection,
737
- )
738
- # Update identifier to use location as well as title
739
- # TODO: should we use the name as id? it provides much more
740
- # information in the clients
741
- collection.id = location["Identifier"]
742
- collection.title = (location["Name"],)
743
- # See if description should be overwritten
744
- if "Description" in location:
745
- collection.description = location["Description"]
746
- else:
747
- collection.description = location["Name"]
748
- # TODO: should we remove all assets from sub collections?
749
- link = root_collection.add_child(collection)
750
- latlng = "%s,%s" % (location["Point"][1], location["Point"][0])
751
- # Add extra properties we need
752
- link.extra_fields["id"] = location["Identifier"]
753
- link.extra_fields["latlng"] = latlng
754
- link.extra_fields["name"] = location["Name"]
755
- add_example_info(collection, data, endpoint, config)
756
- if "OverwriteBBox" in location:
757
- collection.extent.spatial = SpatialExtent(
758
- [
759
- location["OverwriteBBox"],
760
- ]
761
- )
762
- root_collection.update_extent_from_items()
763
- # Add bbox extents from children
764
- for c_child in root_collection.get_children():
765
- root_collection.extent.spatial.bboxes.append(
766
- c_child.extent.spatial.bboxes[0]
767
- )
768
- else:
769
- if "Bbox" in endpoint:
770
- root_collection = process_STACAPI_Endpoint(
771
- config=config,
772
- endpoint=endpoint,
773
- data=data,
774
- catalog=catalog,
775
- options=options,
776
- headers=headers,
777
- bbox=",".join(map(str, endpoint["Bbox"])),
778
- )
779
- else:
780
- root_collection = process_STACAPI_Endpoint(
781
- config=config,
782
- endpoint=endpoint,
783
- data=data,
784
- catalog=catalog,
785
- options=options,
786
- headers=headers,
787
- )
788
-
789
- add_example_info(root_collection, data, endpoint, config)
790
- add_to_catalog(root_collection, catalog, endpoint, data)
791
-
792
-
793
- def add_base_overlay_info(collection, config, data):
794
- # check if default base layers defined
795
- if "default_base_layers" in config:
796
- with open("%s.yaml" % config["default_base_layers"]) as f:
797
- base_layers = yaml.load(f, Loader=SafeLoader)
798
- for layer in base_layers:
799
- collection.add_link(create_web_map_link(layer, role="baselayer"))
800
- # check if default overlay layers defined
801
- if "default_overlay_layers" in config:
802
- with open("%s.yaml" % config["default_overlay_layers"]) as f:
803
- overlay_layers = yaml.load(f, Loader=SafeLoader)
804
- for layer in overlay_layers:
805
- collection.add_link(create_web_map_link(layer, role="overlay"))
806
- if "BaseLayers" in data:
807
- for layer in data["BaseLayers"]:
808
- collection.add_link(create_web_map_link(layer, role="baselayer"))
809
- if "OverlayLayers" in data:
810
- for layer in data["OverlayLayers"]:
811
- collection.add_link(create_web_map_link(layer, role="overlay"))
812
- # TODO: possibility to overwrite default base and overlay layers
813
-
814
-
815
- def create_web_map_link(layer, role):
816
- extra_fields = {
817
- "roles": [role],
818
- "id": layer["id"],
819
- }
820
- if "default" in layer and layer["default"]:
821
- extra_fields["roles"].append("default")
822
- if "visible" in layer and layer["visible"]:
823
- extra_fields["roles"].append("visible")
824
- if "visible" in layer and not layer["visible"]:
825
- extra_fields["roles"].append("invisible")
826
-
827
- match layer["protocol"]:
828
- case "wms":
829
- # handle wms special config options
830
- extra_fields["wms:layers"] = layer["layers"]
831
- if "styles" in layer:
832
- extra_fields["wms:styles"] = layer["styles"]
833
- # TODO: handle wms dimensions extra_fields["wms:dimensions"]
834
- case "wmts":
835
- extra_fields["wmts:layer"] = layer["layer"]
836
- # TODO: handle wmts dimensions extra_fields["wmts:dimensions"]
837
-
838
- wml = Link(
839
- rel=layer["protocol"],
840
- target=layer["url"],
841
- media_type="image/png" if "media_type" not in layer else layer["media_type"],
842
- title=layer["name"],
843
- extra_fields=extra_fields,
844
- )
845
- return wml
846
-
847
-
848
- def add_example_info(stac_object, data, endpoint, config):
849
- if "Services" in data:
850
- for service in data["Services"]:
851
- if service["Name"] == "Statistical API":
852
- service_type = "byoc" if "Type" not in service else service["Type"]
853
- stac_object.add_link(
854
- Link(
855
- rel="example",
856
- target="%s/%s" % (config["assets_endpoint"], service["Script"]),
857
- title="evalscript",
858
- media_type="application/javascript",
859
- extra_fields={
860
- "example:language": "JavaScript",
861
- "dataId": "%s-%s" % (service_type, service["CollectionId"]),
862
- },
863
- )
864
- )
865
- if service["Name"] == "VEDA Statistics":
866
- stac_object.add_link(
867
- Link(
868
- rel="example",
869
- target=service["Endpoint"],
870
- title=service["Name"],
871
- media_type="application/json",
872
- extra_fields={
873
- "example:language": "JSON",
874
- },
875
- )
876
- )
877
- if service["Name"] == "EOxHub Notebook":
878
- # TODO: we need to consider if we can improve information added
879
- stac_object.add_link(
880
- Link(
881
- rel="example",
882
- target=service["Url"],
883
- title=(
884
- service["Title"] if "Title" in service else service["Name"]
885
- ),
886
- media_type="application/x-ipynb+json",
887
- extra_fields={
888
- "example:language": "Jupyter Notebook",
889
- "example:container": True,
890
- },
891
- )
892
- )
893
- elif "Resources" in data:
894
- for service in data["Resources"]:
895
- if service.get("Name") == "xcube":
896
- target_url = "%s/timeseries/%s/%s?aggMethods=median" % (
897
- endpoint["EndPoint"],
898
- endpoint["DatacubeId"],
899
- endpoint["Variable"],
900
- )
901
- stac_object.add_link(
902
- Link(
903
- rel="example",
904
- target=target_url,
905
- title=service["Name"] + " analytics",
906
- media_type="application/json",
907
- extra_fields={
908
- "example:language": "JSON",
909
- "example:method": "POST",
910
- },
911
- )
912
- )
913
-
914
-
915
- def generate_veda_cog_link(endpoint, file_url):
916
- bidx = ""
917
- if "Bidx" in endpoint:
918
- # Check if an array was provided
919
- if hasattr(endpoint["Bidx"], "__len__"):
920
- for band in endpoint["Bidx"]:
921
- bidx = bidx + "&bidx=%s" % (band)
922
- else:
923
- bidx = "&bidx=%s" % (endpoint["Bidx"])
924
-
925
- colormap = ""
926
- if "Colormap" in endpoint:
927
- colormap = "&colormap=%s" % (endpoint["Colormap"])
928
- # TODO: For now we assume a already urlparsed colormap definition
929
- # it could be nice to allow a json and better convert it on the fly
930
- # colormap = "&colormap=%s"%(urllib.parse.quote(str(endpoint["Colormap"])))
931
-
932
- colormap_name = ""
933
- if "ColormapName" in endpoint:
934
- colormap_name = "&colormap_name=%s" % (endpoint["ColormapName"])
935
-
936
- rescale = ""
937
- if "Rescale" in endpoint:
938
- rescale = "&rescale=%s,%s" % (endpoint["Rescale"][0], endpoint["Rescale"][1])
939
-
940
- if file_url:
941
- file_url = "url=%s&" % (file_url)
942
- else:
943
- file_url = ""
944
-
945
- target_url = (
946
- "https://staging-raster.delta-backend.com/cog/tiles/WebMercatorQuad/{z}/{x}/{y}?%sresampling_method=nearest%s%s%s%s"
947
- % (
948
- file_url,
949
- bidx,
950
- colormap,
951
- colormap_name,
952
- rescale,
953
- )
954
- )
955
- return target_url
956
-
957
-
958
- def generate_veda_tiles_link(endpoint, item):
959
- collection = "collection=%s" % endpoint["CollectionId"]
960
- assets = ""
961
- for asset in endpoint["Assets"]:
962
- assets += "&assets=%s" % asset
963
- color_formula = ""
964
- if "ColorFormula" in endpoint:
965
- color_formula = "&color_formula=%s" % endpoint["ColorFormula"]
966
- no_data = ""
967
- if "NoData" in endpoint:
968
- no_data = "&no_data=%s" % endpoint["NoData"]
969
- if item:
970
- item = "&item=%s" % (item)
971
- else:
972
- item = ""
973
- target_url = (
974
- "https://staging-raster.delta-backend.com/stac/tiles/WebMercatorQuad/{z}/{x}/{y}?%s%s%s%s%s"
975
- % (
976
- collection,
977
- item,
978
- assets,
979
- color_formula,
980
- no_data,
981
- )
982
- )
983
- return target_url
984
-
985
-
986
- def add_visualization_info(stac_object, data, endpoint, file_url=None, time=None):
987
- # add extension reference
988
- if endpoint["Name"] == "Sentinel Hub" or endpoint["Name"] == "Sentinel Hub WMS":
989
- instanceId = os.getenv("SH_INSTANCE_ID")
990
- if "InstanceId" in endpoint:
991
- instanceId = endpoint["InstanceId"]
992
- extra_fields = {
993
- "wms:layers": [endpoint["LayerId"]],
994
- "role": ["data"],
995
- }
996
- if time != None:
997
- if endpoint["Name"] == "Sentinel Hub WMS":
998
- # SH WMS for public collections needs time interval, we use full day here
999
- datetime_object = datetime.strptime(time, "%Y-%m-%d")
1000
- extra_fields["wms:dimensions"] = {
1001
- "TIME": "%s/%s"
1002
- % (
1003
- datetime_object.isoformat(),
1004
- (
1005
- datetime_object
1006
- + timedelta(days=1)
1007
- - timedelta(milliseconds=1)
1008
- ).isoformat(),
1009
- )
1010
- }
1011
- if endpoint["Name"] == "Sentinel Hub":
1012
- extra_fields["wms:dimensions"] = {"TIME": time}
1013
- stac_object.add_link(
1014
- Link(
1015
- rel="wms",
1016
- target="https://services.sentinel-hub.com/ogc/wms/%s" % (instanceId),
1017
- media_type=(
1018
- endpoint["MimeType"] if "MimeType" in endpoint else "image/png"
1019
- ),
1020
- title=data["Name"],
1021
- extra_fields=extra_fields,
1022
- )
1023
- )
1024
- # elif resource["Name"] == "GeoDB":
1025
- # pass
1026
- elif endpoint["Name"] == "WMS":
1027
- extra_fields = {
1028
- "wms:layers": [endpoint["LayerId"]],
1029
- "role": ["data"],
1030
- }
1031
- if time != None:
1032
- extra_fields["wms:dimensions"] = {
1033
- "TIME": time,
1034
- }
1035
- if "Styles" in endpoint:
1036
- extra_fields["wms:styles"] = endpoint["Styles"]
1037
- media_type = "image/jpeg"
1038
- if "MediaType" in endpoint:
1039
- media_type = endpoint["MediaType"]
1040
- stac_object.add_link(
1041
- Link(
1042
- rel="wms",
1043
- target=endpoint["EndPoint"],
1044
- media_type=media_type,
1045
- title=data["Name"],
1046
- extra_fields=extra_fields,
1047
- )
1048
- )
1049
- elif endpoint["Name"] == "JAXA_WMTS_PALSAR":
1050
- target_url = "%s" % (endpoint.get("EndPoint"),)
1051
- # custom time just for this special case as a default for collection wmts
1052
- extra_fields = {
1053
- "wmts:layer": endpoint.get("LayerId").replace("{time}", time or "2017")
1054
- }
1055
- stac_object.add_link(
1056
- Link(
1057
- rel="wmts",
1058
- target=target_url,
1059
- media_type="image/png",
1060
- title="wmts capabilities",
1061
- extra_fields=extra_fields,
1062
- )
1063
- )
1064
- elif endpoint["Name"] == "xcube":
1065
- if endpoint["Type"] == "zarr":
1066
- # either preset ColormapName of left as a template
1067
- cbar = endpoint.get("ColormapName", "{cbar}")
1068
- # either preset Rescale of left as a template
1069
- vmin = "{vmin}"
1070
- vmax = "{vmax}"
1071
- if "Rescale" in endpoint:
1072
- vmin = endpoint["Rescale"][0]
1073
- vmax = endpoint["Rescale"][1]
1074
- crs = endpoint.get("Crs", "EPSG:3857")
1075
- target_url = (
1076
- "%s/tiles/%s/%s/{z}/{y}/{x}?crs=%s&time={time}&vmin=%s&vmax=%s&cbar=%s"
1077
- % (
1078
- endpoint["EndPoint"],
1079
- endpoint["DatacubeId"],
1080
- endpoint["Variable"],
1081
- crs,
1082
- vmin,
1083
- vmax,
1084
- cbar,
1085
- )
1086
- )
1087
- stac_object.add_link(
1088
- Link(
1089
- rel="xyz",
1090
- target=target_url,
1091
- media_type="image/png",
1092
- title="xcube tiles",
1093
- )
1094
- )
1095
- elif endpoint["Type"] == "WMTSCapabilities":
1096
- target_url = "%s" % (endpoint.get("EndPoint"),)
1097
- extra_fields = {
1098
- "wmts:layer": endpoint.get("LayerId"),
1099
- "role": ["data"],
1100
- }
1101
- dimensions = {}
1102
- if time != None:
1103
- dimensions["time"] = time
1104
- if dimensions_config := endpoint.get("Dimensions", {}):
1105
- for key, value in dimensions_config.items():
1106
- dimensions[key] = value
1107
- if dimensions != {}:
1108
- extra_fields["wmts:dimensions"] = dimensions
1109
- stac_object.add_link(
1110
- Link(
1111
- rel="wmts",
1112
- target=target_url,
1113
- media_type="image/png",
1114
- title="wmts capabilities",
1115
- extra_fields=extra_fields,
1116
- )
1117
- )
1118
- elif endpoint["Name"] == "VEDA":
1119
- if endpoint["Type"] == "cog":
1120
- target_url = generate_veda_cog_link(endpoint, file_url)
1121
- elif endpoint["Type"] == "tiles":
1122
- target_url = generate_veda_tiles_link(endpoint, file_url)
1123
- if target_url:
1124
- stac_object.add_link(
1125
- Link(
1126
- rel="xyz",
1127
- target=target_url,
1128
- media_type="image/png",
1129
- title=data["Name"],
1130
- )
1131
- )
1132
- elif endpoint["Name"] == "GeoDB Vector Tiles":
1133
- # `${geoserverUrl}${config.layerName}@EPSG%3A${projString}@pbf/{z}/{x}/{-y}.pbf`,
1134
- # 'geodb_debd884d-92f9-4979-87b6-eadef1139394:GTIF_AT_Gemeinden_3857'
1135
- target_url = "%s%s:%s_%s@EPSG:3857@pbf/{z}/{x}/{-y}.pbf" % (
1136
- endpoint["EndPoint"],
1137
- endpoint["Instance"],
1138
- endpoint["Database"],
1139
- endpoint["CollectionId"],
1140
- )
1141
- stac_object.add_link(
1142
- Link(
1143
- rel="xyz",
1144
- target=target_url,
1145
- media_type="application/pbf",
1146
- title=data["Name"],
1147
- extra_fields={
1148
- "description": data["Title"],
1149
- "parameters": endpoint["Parameters"],
1150
- "matchKey": endpoint["MatchKey"],
1151
- "timeKey": endpoint["TimeKey"],
1152
- "source": endpoint["Source"],
1153
- "role": ["data"],
1154
- },
1155
- )
1156
- )
1157
- else:
1158
- print("Visualization endpoint not supported")
1159
-
1160
-
1161
- def process_STACAPI_Endpoint(
1162
- config,
1163
- endpoint,
1164
- data,
1165
- catalog,
1166
- options,
1167
- headers={},
1168
- bbox=None,
1169
- root_collection=None,
1170
- filter_dates=None,
1171
- ):
1172
- collection, _ = get_or_create_collection(
1173
- catalog, endpoint["CollectionId"], data, config, endpoint
1174
- )
1175
- # add_visualization_info(collection, data, endpoint)
1176
-
1177
- api = Client.open(endpoint["EndPoint"], headers=headers)
1178
- if bbox == None:
1179
- bbox = "-180,-90,180,90"
1180
- results = api.search(
1181
- collections=[endpoint["CollectionId"]],
1182
- bbox=bbox,
1183
- datetime=["1900-01-01T00:00:00Z", "3000-01-01T00:00:00Z"],
1184
- )
1185
- # We keep track of potential duplicate times in this list
1186
- added_times = {}
1187
- for item in results.items():
1188
- item_datetime = item.get_datetime()
1189
- if item_datetime != None:
1190
- iso_date = item_datetime.isoformat()[:10]
1191
- # if filterdates has been specified skip dates not listed in config
1192
- if filter_dates and iso_date not in filter_dates:
1193
- continue
1194
- if iso_date in added_times:
1195
- continue
1196
- added_times[iso_date] = True
1197
- link = collection.add_item(item)
1198
- if options.tn:
1199
- if "cog_default" in item.assets:
1200
- generate_thumbnail(
1201
- item, data, endpoint, item.assets["cog_default"].href
1202
- )
1203
- else:
1204
- generate_thumbnail(item, data, endpoint)
1205
- # Check if we can create visualization link
1206
- if "Assets" in endpoint:
1207
- add_visualization_info(item, data, endpoint, item.id)
1208
- link.extra_fields["item"] = item.id
1209
- elif "cog_default" in item.assets:
1210
- add_visualization_info(
1211
- item, data, endpoint, item.assets["cog_default"].href
1212
- )
1213
- link.extra_fields["cog_href"] = item.assets["cog_default"].href
1214
- elif item_datetime:
1215
- time_string = item_datetime.isoformat()[:-6] + "Z"
1216
- add_visualization_info(item, data, endpoint, time=time_string)
1217
- elif "start_datetime" in item.properties and "end_datetime" in item.properties:
1218
- add_visualization_info(
1219
- item,
1220
- data,
1221
- endpoint,
1222
- time="%s/%s"
1223
- % (item.properties["start_datetime"], item.properties["end_datetime"]),
1224
- )
1225
- # If a root collection exists we point back to it from the item
1226
- if root_collection != None:
1227
- item.set_collection(root_collection)
1228
-
1229
- # bubble up information we want to the link
1230
- # it is possible for datetime to be null, if it is start and end datetime have to exist
1231
- if item_datetime:
1232
- iso_time = item_datetime.isoformat()[:-6] + "Z"
1233
- if endpoint["Name"] == "Sentinel Hub":
1234
- # for SH WMS we only save the date (no time)
1235
- link.extra_fields["datetime"] = iso_date
1236
- else:
1237
- link.extra_fields["datetime"] = iso_time
1238
- else:
1239
- link.extra_fields["start_datetime"] = item.properties["start_datetime"]
1240
- link.extra_fields["end_datetime"] = item.properties["end_datetime"]
1241
-
1242
- collection.update_extent_from_items()
1243
-
1244
- # replace SH identifier with catalog identifier
1245
- collection.id = data["Name"]
1246
- add_collection_information(config, collection, data)
1247
-
1248
- # Check if we need to overwrite the bbox after update from items
1249
- if "OverwriteBBox" in endpoint:
1250
- collection.extent.spatial = SpatialExtent(
1251
- [
1252
- endpoint["OverwriteBBox"],
1253
- ]
1254
- )
1255
-
1256
- return collection
1257
-
1258
-
1259
- def fetch_and_save_thumbnail(data, url):
1260
- collection_path = "../thumbnails/%s_%s/" % (data["EodashIdentifier"], data["Name"])
1261
- Path(collection_path).mkdir(parents=True, exist_ok=True)
1262
- image_path = "%s/thumbnail.png" % (collection_path)
1263
- if not os.path.exists(image_path):
1264
- data = requests.get(url).content
1265
- f = open(image_path, "wb")
1266
- f.write(data)
1267
- f.close()
1268
-
1269
-
1270
- def generate_thumbnail(
1271
- stac_object, data, endpoint, file_url=None, time=None, styles=None
1272
- ):
1273
- if endpoint["Name"] == "Sentinel Hub" or endpoint["Name"] == "WMS":
1274
- instanceId = os.getenv("SH_INSTANCE_ID")
1275
- if "InstanceId" in endpoint:
1276
- instanceId = endpoint["InstanceId"]
1277
- # Build example url
1278
- wms_config = "REQUEST=GetMap&SERVICE=WMS&VERSION=1.3.0&FORMAT=image/png&STYLES=&TRANSPARENT=true"
1279
- bbox = "%s,%s,%s,%s" % (
1280
- stac_object.bbox[1],
1281
- stac_object.bbox[0],
1282
- stac_object.bbox[3],
1283
- stac_object.bbox[2],
1284
- )
1285
- output_format = (
1286
- "format=image/png&WIDTH=256&HEIGHT=128&CRS=EPSG:4326&BBOX=%s" % (bbox)
1287
- )
1288
- item_datetime = stac_object.get_datetime()
1289
- # it is possible for datetime to be null, if it is start and end datetime have to exist
1290
- if item_datetime:
1291
- time = item_datetime.isoformat()[:-6] + "Z"
1292
- url = "https://services.sentinel-hub.com/ogc/wms/%s?%s&layers=%s&time=%s&%s" % (
1293
- instanceId,
1294
- wms_config,
1295
- endpoint["LayerId"],
1296
- time,
1297
- output_format,
1298
- )
1299
- fetch_and_save_thumbnail(data, url)
1300
- elif endpoint["Name"] == "VEDA":
1301
- target_url = generate_veda_cog_link(endpoint, file_url)
1302
- # set to get 0/0/0 tile
1303
- url = re.sub(r"\{.\}", "0", target_url)
1304
- fetch_and_save_thumbnail(data, url)
1305
-
1306
-
1307
- def process_STAC_Datacube_Endpoint(config, endpoint, data, catalog):
1308
- collection, _ = get_or_create_collection(
1309
- catalog, data["Name"], data, config, endpoint
1310
- )
1311
- add_visualization_info(collection, data, endpoint)
1312
-
1313
- stac_endpoint_url = endpoint["EndPoint"]
1314
- if endpoint.get("Name") == "xcube":
1315
- stac_endpoint_url = stac_endpoint_url + endpoint.get("StacEndpoint", "")
1316
- # assuming /search not implemented
1317
- api = Client.open(stac_endpoint_url)
1318
- coll = api.get_collection(endpoint.get("CollectionId", "datacubes"))
1319
- item = coll.get_item(endpoint.get("DatacubeId"))
1320
- # slice a datacube along temporal axis to individual items, selectively adding properties
1321
- dimensions = item.properties.get("cube:dimensions", {})
1322
- variables = item.properties.get("cube:variables")
1323
- if not endpoint.get("Variable") in variables.keys():
1324
- raise Exception(
1325
- f'Variable {endpoint.get("Variable")} not found in datacube {variables}'
1326
- )
1327
- time_dimension = "time"
1328
- for k, v in dimensions.items():
1329
- if v.get("type") == "temporal":
1330
- time_dimension = k
1331
- break
1332
- time_entries = dimensions.get(time_dimension).get("values")
1333
- for t in time_entries:
1334
- item = Item(
1335
- id=t,
1336
- bbox=item.bbox,
1337
- properties={},
1338
- geometry=item.geometry,
1339
- datetime=parser.isoparse(t),
1340
- )
1341
- link = collection.add_item(item)
1342
- link.extra_fields["datetime"] = t
1343
- # bubble up information we want to the link
1344
- item_datetime = item.get_datetime()
1345
- # it is possible for datetime to be null, if it is start and end datetime have to exist
1346
- if item_datetime:
1347
- link.extra_fields["datetime"] = item_datetime.isoformat()[:-6] + "Z"
1348
- else:
1349
- link.extra_fields["start_datetime"] = item.properties["start_datetime"]
1350
- link.extra_fields["end_datetime"] = item.properties["end_datetime"]
1351
- unit = variables.get(endpoint.get("Variable")).get("unit")
1352
- if unit and "yAxis" not in data:
1353
- data["yAxis"] = unit
1354
- collection.update_extent_from_items()
1355
-
1356
- add_collection_information(config, collection, data)
1357
-
1358
- return collection
1359
-
1360
-
1361
- def add_collection_information(config, collection, data):
1362
- # Add metadata information
1363
- # Check license identifier
1364
- if "License" in data:
1365
- # Check if list was provided
1366
- if isinstance(data["License"], list):
1367
- if len(data["License"]) == 1:
1368
- collection.license = "proprietary"
1369
- link = Link(
1370
- rel="license",
1371
- target=data["License"][0]["Url"],
1372
- media_type=(
1373
- data["License"][0]["Type"]
1374
- if "Type" in data["License"][0]
1375
- else "text/html"
1376
- ),
1377
- )
1378
- if "Title" in data["License"][0]:
1379
- link.title = data["License"][0]["Title"]
1380
- collection.links.append(link)
1381
- elif len(data["License"]) > 1:
1382
- collection.license = "various"
1383
- for l in data["License"]:
1384
- link = Link(
1385
- rel="license",
1386
- target=l["Url"],
1387
- media_type="text/html" if "Type" in l else l["Type"],
1388
- )
1389
- if "Title" in l:
1390
- link.title = l["Title"]
1391
- collection.links.append(link)
1392
- else:
1393
- license = lookup.by_id(data["License"])
1394
- if license is not None:
1395
- collection.license = license.id
1396
- if license.sources:
1397
- # add links to licenses
1398
- for source in license.sources:
1399
- collection.links.append(
1400
- Link(
1401
- rel="license",
1402
- target=source,
1403
- media_type="text/html",
1404
- )
1405
- )
1406
- else:
1407
- # fallback to proprietary
1408
- print(
1409
- "WARNING: License could not be parsed, falling back to proprietary"
1410
- )
1411
- collection.license = "proprietary"
1412
- else:
1413
- # print("WARNING: No license was provided, falling back to proprietary")
1414
- pass
1415
-
1416
- if "Provider" in data:
1417
- try:
1418
- collection.providers = [
1419
- Provider(
1420
- # convert information to lower case
1421
- **dict((k.lower(), v) for k, v in provider.items())
1422
- )
1423
- for provider in data["Provider"]
1424
- ]
1425
- except:
1426
- print(
1427
- "WARNING: Issue creating provider information for collection: %s"
1428
- % collection.id
1429
- )
1430
-
1431
- if "Citation" in data:
1432
- if "DOI" in data["Citation"]:
1433
- collection.extra_fields["sci:doi"] = data["Citation"]["DOI"]
1434
- if "Citation" in data["Citation"]:
1435
- collection.extra_fields["sci:citation"] = data["Citation"]["Citation"]
1436
- if "Publication" in data["Citation"]:
1437
- collection.extra_fields["sci:publications"] = [
1438
- # convert keys to lower case
1439
- dict((k.lower(), v) for k, v in publication.items())
1440
- for publication in data["Citation"]["Publication"]
1441
- ]
1442
-
1443
- if "Subtitle" in data:
1444
- collection.extra_fields["subtitle"] = data["Subtitle"]
1445
- if "Legend" in data:
1446
- collection.add_asset(
1447
- "legend",
1448
- Asset(
1449
- href="%s/%s" % (config["assets_endpoint"], data["Legend"]),
1450
- media_type="image/png",
1451
- roles=["metadata"],
1452
- ),
1453
- )
1454
- if "Story" in data:
1455
- collection.add_asset(
1456
- "story",
1457
- Asset(
1458
- href="%s/%s" % (config["assets_endpoint"], data["Story"]),
1459
- media_type="text/markdown",
1460
- roles=["metadata"],
1461
- ),
1462
- )
1463
- if "Image" in data:
1464
- collection.add_asset(
1465
- "thumbnail",
1466
- Asset(
1467
- href="%s/%s" % (config["assets_endpoint"], data["Image"]),
1468
- media_type="image/png",
1469
- roles=["thumbnail"],
1470
- ),
1471
- )
1472
- # Add extra fields to collection if available
1473
- add_extra_fields(collection, data)
1474
-
1475
- if "References" in data:
1476
- generic_counter = 1
1477
- for ref in data["References"]:
1478
- if "Key" in ref:
1479
- key = ref["Key"]
1480
- else:
1481
- key = "reference_%s" % generic_counter
1482
- generic_counter = generic_counter + 1
1483
- collection.add_asset(
1484
- key,
1485
- Asset(
1486
- href=ref["Url"],
1487
- title=ref["Name"],
1488
- media_type=ref["MediaType"] if "MediaType" in ref else "text/html",
1489
- roles=["metadata"],
1490
- ),
1491
- )
1492
-
1493
-
1494
- @dataclass
1495
- class Options:
1496
- catalogspath: str
1497
- collectionspath: str
1498
- indicatorspath: str
1499
- outputpath: str
1500
- vd: bool
1501
- ni: bool
1502
- tn: bool
1503
- collections: List[str]
1504
-
1505
-
1506
354
  @click.command()
1507
355
  @click.option(
1508
356
  "--catalog",
@@ -1539,9 +387,7 @@ class Options:
1539
387
  is_flag=True,
1540
388
  help="validation flag, if set, validation will be run on generated catalogs",
1541
389
  )
1542
- @click.option(
1543
- "-ni", is_flag=True, help="no items flag, if set, items will not be saved"
1544
- )
390
+ @click.option("-ni", is_flag=True, help="no items flag, if set, items will not be saved")
1545
391
  @click.option(
1546
392
  "-tn",
1547
393
  is_flag=True,
@@ -1577,14 +423,11 @@ def process_catalogs(
1577
423
  )
1578
424
  tasks = []
1579
425
  for file_name in os.listdir(catalogspath):
1580
- file_path = os.path.join(catalogspath, file_name)
1581
- if os.path.isfile(file_path):
1582
- if catalog == None or os.path.splitext(file_name)[0] == catalog:
1583
- tasks.append(
1584
- RaisingThread(
1585
- target=process_catalog_file, args=(file_path, options)
1586
- )
1587
- )
1588
- tasks[-1].start()
426
+ file_path = f"{catalogspath}/{file_name}"
427
+ if os.path.isfile(file_path) and (
428
+ catalog is None or os.path.splitext(file_name)[0] == catalog
429
+ ):
430
+ tasks.append(RaisingThread(target=process_catalog_file, args=(file_path, options)))
431
+ tasks[-1].start()
1589
432
  for task in tasks:
1590
433
  task.join()