eodash_catalog 0.0.7__py3-none-any.whl → 0.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of eodash_catalog might be problematic. Click here for more details.

@@ -3,74 +3,70 @@
3
3
  Indicator generator to harvest information from endpoints and generate catalog
4
4
 
5
5
  """
6
+
7
+ import os
6
8
  import time
7
- import requests
8
- import json
9
- from pystac_client import Client
10
9
  from dataclasses import dataclass
11
- from typing import List
12
- import os
13
- import re
14
- from pathlib import Path
15
- from datetime import datetime, timedelta
16
- from dotenv import load_dotenv
10
+
11
+ import click
17
12
  import yaml
18
- from yaml.loader import SafeLoader
19
- from itertools import groupby
20
- from operator import itemgetter
21
- from dateutil import parser
22
- from eodash_catalog.sh_endpoint import get_SH_token
23
- from eodash_catalog.utils import (
24
- create_geojson_point,
25
- retrieveExtentFromWMSWMTS,
26
- generateDateIsostringsFromInterval,
27
- RaisingThread,
28
- )
13
+ from dotenv import load_dotenv
29
14
  from pystac import (
30
- Item,
31
- Asset,
32
15
  Catalog,
33
- Link,
34
16
  CatalogType,
35
17
  Collection,
36
- Extent,
37
- SpatialExtent,
38
- TemporalExtent,
39
18
  Summaries,
40
- Provider,
41
19
  )
42
20
  from pystac.layout import TemplateLayoutStrategy
43
21
  from pystac.validation import validate_all
44
- import spdx_lookup as lookup
45
- import argparse
46
-
47
- import click
22
+ from yaml.loader import SafeLoader
48
23
 
24
+ from eodash_catalog.endpoints import (
25
+ handle_collection_only,
26
+ handle_GeoDB_endpoint,
27
+ handle_GeoDB_Tiles_endpoint,
28
+ handle_SH_endpoint,
29
+ handle_SH_WMS_endpoint,
30
+ handle_VEDA_endpoint,
31
+ handle_WMS_endpoint,
32
+ handle_xcube_endpoint,
33
+ )
34
+ from eodash_catalog.stac_handling import (
35
+ add_base_overlay_info,
36
+ add_collection_information,
37
+ add_extra_fields,
38
+ get_or_create_collection_and_times,
39
+ )
40
+ from eodash_catalog.utils import (
41
+ RaisingThread,
42
+ iter_len_at_least,
43
+ recursive_save,
44
+ )
49
45
 
50
46
  # make sure we are loading the env local definition
51
47
  load_dotenv()
52
48
 
53
49
 
54
- def recursive_save(stac_object, no_items=False):
55
- stac_object.save_object()
56
- for child in stac_object.get_children():
57
- recursive_save(child, no_items)
58
- if not no_items:
59
- # try to save items if available
60
- for item in stac_object.get_items():
61
- item.save_object()
50
+ @dataclass
51
+ class Options:
52
+ catalogspath: str
53
+ collectionspath: str
54
+ indicatorspath: str
55
+ outputpath: str
56
+ vd: bool
57
+ ni: bool
58
+ tn: bool
59
+ collections: list[str]
62
60
 
63
61
 
64
- def process_catalog_file(file_path, options):
62
+ def process_catalog_file(file_path: str, options: Options):
65
63
  print("Processing catalog:", file_path)
66
64
  with open(file_path) as f:
67
- config = yaml.load(f, Loader=SafeLoader)
65
+ config: dict = yaml.load(f, Loader=SafeLoader)
68
66
 
69
67
  if len(options.collections) > 0:
70
68
  # create only catalogs containing the passed collections
71
- process_collections = [
72
- c for c in config["collections"] if c in options.collections
73
- ]
69
+ process_collections = [c for c in config["collections"] if c in options.collections]
74
70
  elif (len(options.collections) == 1 and options.collections == "all") or len(
75
71
  options.collections
76
72
  ) == 0:
@@ -86,49 +82,51 @@ def process_catalog_file(file_path, options):
86
82
  catalog_type=CatalogType.RELATIVE_PUBLISHED,
87
83
  )
88
84
  for collection in process_collections:
89
- file_path = "%s/%s.yaml" % (options.collectionspath, collection)
85
+ file_path = f"{options.collectionspath}/{collection}.yaml"
90
86
  if os.path.isfile(file_path):
91
87
  # if collection file exists process it as indicator
92
88
  # collection will be added as single collection to indicator
93
89
  process_indicator_file(config, file_path, catalog, options)
94
90
  else:
95
91
  # if not try to see if indicator definition available
96
- file_path = "%s/%s.yaml" % (options.indicatorspath, collection)
92
+ file_path = f"{options.indicatorspath}/{collection}.yaml"
97
93
  if os.path.isfile(file_path):
98
94
  process_indicator_file(
99
95
  config,
100
- "%s/%s.yaml" % (options.indicatorspath, collection),
96
+ f"{options.indicatorspath}/{collection}.yaml",
101
97
  catalog,
102
98
  options,
103
99
  )
104
100
  else:
105
- print(
106
- "Warning: neither collection nor indicator found for %s"
107
- % collection
108
- )
101
+ print(f"Warning: neither collection nor indicator found for {collection}")
109
102
 
110
103
  strategy = TemplateLayoutStrategy(item_template="${collection}/${year}")
111
- catalog.normalize_hrefs(
112
- "%s/%s" % (options.outputpath, config["id"]), strategy=strategy
113
- )
104
+ # expecting that the catalog will be hosted online, self url should correspond to that
105
+ # default to a local folder + catalog id in case not set
114
106
 
115
107
  print("Started creation of collection files")
116
108
  start = time.time()
117
109
  if options.ni:
110
+ catalog_self_href = f'{options.outputpath}/{config["id"]}'
111
+ catalog.normalize_hrefs(catalog_self_href, strategy=strategy)
118
112
  recursive_save(catalog, options.ni)
119
113
  else:
120
114
  # For full catalog save with items this still seems to be faster
121
- catalog.save(dest_href="%s/%s" % (options.outputpath, config["id"]))
115
+ catalog_self_href = config.get(
116
+ "endpoint", "{}/{}".format(options.outputpath, config["id"])
117
+ )
118
+ catalog.normalize_hrefs(catalog_self_href, strategy=strategy)
119
+ catalog.save(dest_href="{}/{}".format(options.outputpath, config["id"]))
122
120
  end = time.time()
123
121
  print(f"Catalog {config['id']}: Time consumed in saving: {end - start}")
124
122
 
125
123
  if options.vd:
126
124
  # try to validate catalog if flag was set
127
- print("Running validation of catalog %s" % file_path)
125
+ print(f"Running validation of catalog {file_path}")
128
126
  try:
129
127
  validate_all(catalog.to_dict(), href=config["endpoint"])
130
128
  except Exception as e:
131
- print("Issue validation collection: %s" % e)
129
+ print(f"Issue validation collection: {e}")
132
130
 
133
131
 
134
132
  def extract_indicator_info(parent_collection):
@@ -155,10 +153,9 @@ def extract_indicator_info(parent_collection):
155
153
  else:
156
154
  summaries[key].add(param)
157
155
  # extract also summary information
158
- if collection.summaries.lists:
159
- if key in collection.summaries.lists:
160
- for p in collection.summaries.lists[key]:
161
- summaries[key].add(p)
156
+ if collection.summaries.lists and collection.summaries.lists.get(key):
157
+ for p in collection.summaries.lists[key]:
158
+ summaries[key].add(p)
162
159
 
163
160
  for key in to_extract:
164
161
  # convert all items back to a list
@@ -169,23 +166,19 @@ def extract_indicator_info(parent_collection):
169
166
  parent_collection.summaries = Summaries(summaries)
170
167
 
171
168
 
172
- def iter_len_at_least(i, n):
173
- return sum(1 for _ in zip(range(n), i)) == n
174
-
175
-
176
- def process_indicator_file(config, file_path, catalog, options):
169
+ def process_indicator_file(config: dict, file_path: str, catalog: Catalog, options: Options):
177
170
  with open(file_path) as f:
178
171
  print("Processing indicator:", file_path)
179
- data = yaml.load(f, Loader=SafeLoader)
180
- parent_indicator, _ = get_or_create_collection(
181
- catalog, data["Name"], data, config
172
+ data: dict = yaml.load(f, Loader=SafeLoader)
173
+ parent_indicator, _ = get_or_create_collection_and_times(
174
+ catalog, data["Name"], data, config, {}
182
175
  )
183
176
  if "Collections" in data:
184
177
  for collection in data["Collections"]:
185
178
  process_collection_file(
186
179
  config,
187
- "%s/%s.yaml" % (options.collectionspath, collection),
188
- catalog,
180
+ f"{options.collectionspath}/{collection}.yaml",
181
+ parent_indicator,
189
182
  options,
190
183
  )
191
184
  else:
@@ -196,9 +189,7 @@ def process_indicator_file(config, file_path, catalog, options):
196
189
  parent_indicator.update_extent_from_items()
197
190
  # Add bbox extents from children
198
191
  for c_child in parent_indicator.get_children():
199
- parent_indicator.extent.spatial.bboxes.append(
200
- c_child.extent.spatial.bboxes[0]
201
- )
192
+ parent_indicator.extent.spatial.bboxes.append(c_child.extent.spatial.bboxes[0])
202
193
  # extract collection information and add it to summary indicator level
203
194
  extract_indicator_info(parent_indicator)
204
195
  # add baselayer and overview information to indicator collection
@@ -206,51 +197,54 @@ def process_indicator_file(config, file_path, catalog, options):
206
197
  add_to_catalog(parent_indicator, catalog, None, data)
207
198
 
208
199
 
209
- def process_collection_file(config, file_path, catalog, options):
200
+ def process_collection_file(
201
+ config: dict, file_path: str, catalog: Catalog | Collection, options: Options
202
+ ):
210
203
  print("Processing collection:", file_path)
211
204
  with open(file_path) as f:
212
- data = yaml.load(f, Loader=SafeLoader)
205
+ data: dict = yaml.load(f, Loader=SafeLoader)
213
206
  if "Resources" in data:
214
207
  for resource in data["Resources"]:
215
208
  if "EndPoint" in resource:
209
+ collection = None
216
210
  if resource["Name"] == "Sentinel Hub":
217
- handle_SH_endpoint(config, resource, data, catalog, options)
211
+ collection = handle_SH_endpoint(config, resource, data, catalog, options)
218
212
  elif resource["Name"] == "Sentinel Hub WMS":
219
- collection = handle_SH_WMS_endpoint(
220
- config, resource, data, catalog
221
- )
213
+ collection = handle_SH_WMS_endpoint(config, resource, data, catalog)
222
214
  elif resource["Name"] == "GeoDB":
223
- collection = handle_GeoDB_endpoint(
224
- config, resource, data, catalog
225
- )
226
- add_to_catalog(collection, catalog, resource, data)
215
+ collection = handle_GeoDB_endpoint(config, resource, data, catalog)
227
216
  elif resource["Name"] == "VEDA":
228
- handle_VEDA_endpoint(config, resource, data, catalog, options)
217
+ collection = handle_VEDA_endpoint(config, resource, data, catalog, options)
229
218
  elif resource["Name"] == "marinedatastore":
230
- handle_WMS_endpoint(config, resource, data, catalog, wmts=True)
219
+ collection = handle_WMS_endpoint(config, resource, data, catalog, wmts=True)
231
220
  elif resource["Name"] == "xcube":
232
- handle_xcube_endpoint(config, resource, data, catalog)
221
+ collection = handle_xcube_endpoint(config, resource, data, catalog)
233
222
  elif resource["Name"] == "WMS":
234
- handle_WMS_endpoint(config, resource, data, catalog)
223
+ collection = handle_WMS_endpoint(config, resource, data, catalog)
235
224
  elif resource["Name"] == "GeoDB Vector Tiles":
236
- handle_GeoDB_Tiles_endpoint(config, resource, data, catalog)
225
+ collection = handle_GeoDB_Tiles_endpoint(config, resource, data, catalog)
237
226
  elif resource["Name"] == "JAXA_WMTS_PALSAR":
238
227
  # somewhat one off creation of individual WMTS layers as individual items
239
- handle_WMS_endpoint(config, resource, data, catalog, wmts=True)
228
+ collection = handle_WMS_endpoint(config, resource, data, catalog, wmts=True)
240
229
  elif resource["Name"] == "Collection-only":
241
- handle_collection_only(config, resource, data, catalog)
230
+ collection = handle_collection_only(config, resource, data, catalog)
242
231
  else:
243
232
  raise ValueError("Type of Resource is not supported")
233
+ if collection is not None:
234
+ add_to_catalog(collection, catalog, resource, data)
235
+ else:
236
+ raise Exception("No collection generated")
244
237
  elif "Subcollections" in data:
245
238
  # if no endpoint is specified we check for definition of subcollections
246
- parent_collection, _ = get_or_create_collection(
247
- catalog, data["Name"], data, config
239
+ parent_collection, _ = get_or_create_collection_and_times(
240
+ catalog, data["Name"], data, config, {}
248
241
  )
249
242
 
250
243
  locations = []
251
244
  countries = []
252
245
  for sub_coll_def in data["Subcollections"]:
253
- # Subcollection has only data on one location which is defined for the entire collection
246
+ # Subcollection has only data on one location which
247
+ # is defined for the entire collection
254
248
  if "Name" in sub_coll_def and "Point" in sub_coll_def:
255
249
  locations.append(sub_coll_def["Name"])
256
250
  if isinstance(sub_coll_def["Country"], list):
@@ -259,8 +253,7 @@ def process_collection_file(config, file_path, catalog, options):
259
253
  countries.append(sub_coll_def["Country"])
260
254
  process_collection_file(
261
255
  config,
262
- "%s/%s.yaml"
263
- % (options.collectionspath, sub_coll_def["Collection"]),
256
+ "{}/{}.yaml".format(options.collectionspath, sub_coll_def["Collection"]),
264
257
  parent_collection,
265
258
  options,
266
259
  )
@@ -271,7 +264,7 @@ def process_collection_file(config, file_path, catalog, options):
271
264
  and "id" in link.extra_fields
272
265
  and link.extra_fields["id"] == sub_coll_def["Identifier"]
273
266
  ):
274
- latlng = "%s,%s" % (
267
+ latlng = "{},{}".format(
275
268
  sub_coll_def["Point"][1],
276
269
  sub_coll_def["Point"][0],
277
270
  )
@@ -279,27 +272,21 @@ def process_collection_file(config, file_path, catalog, options):
279
272
  link.extra_fields["latlng"] = latlng
280
273
  link.extra_fields["name"] = sub_coll_def["Name"]
281
274
  # Update title of collection to use location name
282
- sub_collection = parent_collection.get_child(
283
- id=sub_coll_def["Identifier"]
284
- )
275
+ sub_collection = parent_collection.get_child(id=sub_coll_def["Identifier"])
285
276
  if sub_collection:
286
277
  sub_collection.title = sub_coll_def["Name"]
287
- # The subcollection has multiple locations which need to be extracted and elevated to parent collection level
278
+ # The subcollection has multiple locations which need to be extracted
279
+ # and elevated to parent collection level
288
280
  else:
289
281
  # create temp catalog to save collection
290
- tmp_catalog = Catalog(
291
- id="tmp_catalog", description="temp catalog placeholder"
292
- )
282
+ tmp_catalog = Catalog(id="tmp_catalog", description="temp catalog placeholder")
293
283
  process_collection_file(
294
284
  config,
295
- "%s/%s.yaml"
296
- % (options.collectionspath, sub_coll_def["Collection"]),
285
+ "{}/{}.yaml".format(options.collectionspath, sub_coll_def["Collection"]),
297
286
  tmp_catalog,
298
287
  options,
299
288
  )
300
- links = tmp_catalog.get_child(
301
- sub_coll_def["Identifier"]
302
- ).get_links()
289
+ links = tmp_catalog.get_child(sub_coll_def["Identifier"]).get_links() # type: ignore
303
290
  for link in links:
304
291
  # extract summary information
305
292
  if "city" in link.extra_fields:
@@ -316,9 +303,7 @@ def process_collection_file(config, file_path, catalog, options):
316
303
  parent_collection.update_extent_from_items()
317
304
  # Add bbox extents from children
318
305
  for c_child in parent_collection.get_children():
319
- parent_collection.extent.spatial.bboxes.append(
320
- c_child.extent.spatial.bboxes[0]
321
- )
306
+ parent_collection.extent.spatial.bboxes.append(c_child.extent.spatial.bboxes[0])
322
307
  # Fill summaries for locations
323
308
  parent_collection.summaries = Summaries(
324
309
  {
@@ -329,221 +314,6 @@ def process_collection_file(config, file_path, catalog, options):
329
314
  add_to_catalog(parent_collection, catalog, None, data)
330
315
 
331
316
 
332
- def handle_collection_only(config, endpoint, data, catalog):
333
- collection, times = get_or_create_collection(
334
- catalog, data["Name"], data, config, endpoint
335
- )
336
- if len(times) > 0 and not endpoint.get("Disable_Items"):
337
- for t in times:
338
- item = Item(
339
- id=t,
340
- bbox=endpoint.get("OverwriteBBox"),
341
- properties={},
342
- geometry=None,
343
- datetime=parser.isoparse(t),
344
- )
345
- link = collection.add_item(item)
346
- link.extra_fields["datetime"] = t
347
- add_collection_information(config, collection, data)
348
- add_to_catalog(collection, catalog, None, data)
349
-
350
-
351
- def handle_WMS_endpoint(config, endpoint, data, catalog, wmts=False):
352
- collection, times = get_or_create_collection(
353
- catalog, data["Name"], data, config, endpoint
354
- )
355
- spatial_extent = collection.extent.spatial.to_dict().get(
356
- "bbox", [-180, -90, 180, 90]
357
- )[0]
358
- if not endpoint.get("Type") == "OverwriteTimes" or not endpoint.get(
359
- "OverwriteBBox"
360
- ):
361
- # some endpoints allow "narrowed-down" capabilities per-layer, which we utilize to not
362
- # have to process full service capabilities XML
363
- capabilities_url = endpoint["EndPoint"]
364
- spatial_extent, times = retrieveExtentFromWMSWMTS(
365
- capabilities_url, endpoint["LayerId"],
366
- version=endpoint.get('Version', '1.1.1'),
367
- wmts=wmts,
368
- )
369
- # Create an item per time to allow visualization in stac clients
370
- if len(times) > 0 and not endpoint.get("Disable_Items"):
371
- for t in times:
372
- item = Item(
373
- id=t,
374
- bbox=spatial_extent,
375
- properties={},
376
- geometry=None,
377
- datetime=parser.isoparse(t),
378
- stac_extensions=[
379
- "https://stac-extensions.github.io/web-map-links/v1.1.0/schema.json",
380
- ],
381
- )
382
- add_visualization_info(item, data, endpoint, time=t)
383
- link = collection.add_item(item)
384
- link.extra_fields["datetime"] = t
385
- collection.update_extent_from_items()
386
-
387
- # Check if we should overwrite bbox
388
- if "OverwriteBBox" in endpoint:
389
- collection.extent.spatial = SpatialExtent(
390
- [
391
- endpoint["OverwriteBBox"],
392
- ]
393
- )
394
- add_collection_information(config, collection, data)
395
- add_to_catalog(collection, catalog, endpoint, data)
396
-
397
-
398
- def handle_SH_endpoint(config, endpoint, data, catalog, options):
399
- token = get_SH_token()
400
- headers = {"Authorization": "Bearer %s" % token}
401
- endpoint["EndPoint"] = "https://services.sentinel-hub.com/api/v1/catalog/1.0.0/"
402
- # Overwrite collection id with type, such as ZARR or BYOC
403
- if "Type" in endpoint:
404
- endpoint["CollectionId"] = endpoint["Type"] + "-" + endpoint["CollectionId"]
405
- handle_STAC_based_endpoint(config, endpoint, data, catalog, options, headers)
406
-
407
-
408
- def handle_SH_WMS_endpoint(config, endpoint, data, catalog):
409
- # create collection and subcollections (based on locations)
410
- if "Locations" in data:
411
- root_collection, _ = get_or_create_collection(
412
- catalog, data["Name"], data, config, endpoint
413
- )
414
- for location in data["Locations"]:
415
- # create and populate location collections based on times
416
- # TODO: Should we add some new description per location?
417
- location_config = {
418
- "Title": location["Name"],
419
- "Description": "",
420
- }
421
- collection, _ = get_or_create_collection(
422
- catalog, location["Identifier"], location_config, config, endpoint
423
- )
424
- collection.extra_fields["endpointtype"] = endpoint["Name"]
425
- for time in location["Times"]:
426
- item = Item(
427
- id=time,
428
- bbox=location["Bbox"],
429
- properties={},
430
- geometry=None,
431
- datetime=parser.isoparse(time),
432
- stac_extensions=[
433
- "https://stac-extensions.github.io/web-map-links/v1.1.0/schema.json",
434
- ],
435
- )
436
- add_visualization_info(item, data, endpoint, time=time)
437
- item_link = collection.add_item(item)
438
- item_link.extra_fields["datetime"] = time
439
-
440
- link = root_collection.add_child(collection)
441
- # bubble up information we want to the link
442
- latlng = "%s,%s" % (location["Point"][1], location["Point"][0])
443
- link.extra_fields["id"] = location["Identifier"]
444
- link.extra_fields["latlng"] = latlng
445
- link.extra_fields["country"] = location["Country"]
446
- link.extra_fields["city"] = location["Name"]
447
- collection.update_extent_from_items()
448
- add_visualization_info(collection, data, endpoint)
449
-
450
- root_collection.update_extent_from_items()
451
- # Add bbox extents from children
452
- for c_child in root_collection.get_children():
453
- root_collection.extent.spatial.bboxes.append(
454
- c_child.extent.spatial.bboxes[0]
455
- )
456
- add_to_catalog(root_collection, catalog, endpoint, data)
457
- return root_collection
458
-
459
-
460
- def handle_VEDA_endpoint(config, endpoint, data, catalog, options):
461
- handle_STAC_based_endpoint(config, endpoint, data, catalog, options)
462
-
463
-
464
- def handle_xcube_endpoint(config, endpoint, data, catalog):
465
- root_collection = process_STAC_Datacube_Endpoint(
466
- config=config,
467
- endpoint=endpoint,
468
- data=data,
469
- catalog=catalog,
470
- )
471
-
472
- add_example_info(root_collection, data, endpoint, config)
473
- add_to_catalog(root_collection, catalog, endpoint, data)
474
-
475
-
476
- def get_or_create_collection(catalog, collection_id, data, config, endpoint=None):
477
- # Check if collection already in catalog
478
- for collection in catalog.get_collections():
479
- if collection.id == collection_id:
480
- return collection, []
481
- # If none found create a new one
482
- spatial_extent = [-180.0, -90.0, 180.0, 90.0]
483
- if endpoint and endpoint.get("OverwriteBBox"):
484
- spatial_extent = endpoint.get("OverwriteBBox")
485
- spatial_extent = SpatialExtent(
486
- [
487
- spatial_extent,
488
- ]
489
- )
490
- times = []
491
- temporal_extent = TemporalExtent([[datetime.now(), None]])
492
- if endpoint and endpoint.get("Type") == "OverwriteTimes":
493
- if endpoint.get("Times"):
494
- times = endpoint.get("Times")
495
- times_datetimes = sorted([parser.isoparse(time) for time in times])
496
- temporal_extent = TemporalExtent(
497
- [[times_datetimes[0], times_datetimes[-1]]]
498
- )
499
- elif endpoint.get("DateTimeInterval"):
500
- start = endpoint["DateTimeInterval"].get("Start", "2020-09-01T00:00:00")
501
- end = endpoint["DateTimeInterval"].get("End", "2020-10-01T00:00:00")
502
- timedelta_config = endpoint["DateTimeInterval"].get(
503
- "Timedelta", {"days": 1}
504
- )
505
- times = generateDateIsostringsFromInterval(start, end, timedelta_config)
506
- times_datetimes = sorted([parser.isoparse(time) for time in times])
507
- temporal_extent = TemporalExtent(
508
- [[times_datetimes[0], times_datetimes[-1]]]
509
- )
510
- extent = Extent(spatial=spatial_extent, temporal=temporal_extent)
511
-
512
- # Check if description is link to markdown file
513
- if "Description" in data:
514
- description = data["Description"]
515
- if description.endswith((".md", ".MD")):
516
- if description.startswith(("http")):
517
- # if full absolute path is defined
518
- response = requests.get(description)
519
- if response.status_code == 200:
520
- description = response.text
521
- elif "Subtitle" in data:
522
- print("WARNING: Markdown file could not be fetched")
523
- description = data["Subtitle"]
524
- else:
525
- # relative path to assets was given
526
- response = requests.get(
527
- "%s/%s" % (config["assets_endpoint"], description)
528
- )
529
- if response.status_code == 200:
530
- description = response.text
531
- elif "Subtitle" in data:
532
- print("WARNING: Markdown file could not be fetched")
533
- description = data["Subtitle"]
534
- elif "Subtitle" in data:
535
- # Try to use at least subtitle to fill some information
536
- description = data["Subtitle"]
537
-
538
- collection = Collection(
539
- id=collection_id,
540
- title=data["Title"],
541
- description=description,
542
- extent=extent,
543
- )
544
- return (collection, times)
545
-
546
-
547
317
  def add_to_catalog(collection, catalog, endpoint, data):
548
318
  # check if already in catalog, if it is do not re-add it
549
319
  # TODO: probably we should add to the catalog only when creating
@@ -554,20 +324,17 @@ def add_to_catalog(collection, catalog, endpoint, data):
554
324
  link = catalog.add_child(collection)
555
325
  # bubble fields we want to have up to collection link and add them to collection
556
326
  if endpoint and "Type" in endpoint:
557
- collection.extra_fields["endpointtype"] = "%s_%s" % (
327
+ collection.extra_fields["endpointtype"] = "{}_{}".format(
558
328
  endpoint["Name"],
559
329
  endpoint["Type"],
560
330
  )
561
- link.extra_fields["endpointtype"] = "%s_%s" % (
331
+ link.extra_fields["endpointtype"] = "{}_{}".format(
562
332
  endpoint["Name"],
563
333
  endpoint["Type"],
564
334
  )
565
335
  elif endpoint:
566
336
  collection.extra_fields["endpointtype"] = endpoint["Name"]
567
337
  link.extra_fields["endpointtype"] = endpoint["Name"]
568
- # Disabling bubbling up of description as now it is considered to be
569
- # used as markdown loading would increase the catalog size unnecessarily
570
- # link.extra_fields["description"] = collection.description
571
338
  if "Subtitle" in data:
572
339
  link.extra_fields["subtitle"] = data["Subtitle"]
573
340
  link.extra_fields["title"] = collection.title
@@ -577,934 +344,13 @@ def add_to_catalog(collection, catalog, endpoint, data):
577
344
  link.extra_fields["themes"] = data["Themes"]
578
345
  # Check for summaries and bubble up info
579
346
  if collection.summaries.lists:
580
- for sum in collection.summaries.lists:
581
- link.extra_fields[sum] = collection.summaries.lists[sum]
347
+ for summary in collection.summaries.lists:
348
+ link.extra_fields[summary] = collection.summaries.lists[summary]
582
349
 
583
350
  add_extra_fields(link, data)
584
351
  return link
585
352
 
586
353
 
587
- def add_extra_fields(stac_object, data):
588
- if "yAxis" in data:
589
- stac_object.extra_fields["yAxis"] = data["yAxis"]
590
- if "Themes" in data:
591
- stac_object.extra_fields["themes"] = data["Themes"]
592
- if "Locations" in data or "Subcollections" in data:
593
- stac_object.extra_fields["locations"] = True
594
- if "Tags" in data:
595
- stac_object.extra_fields["tags"] = data["Tags"]
596
- if "Satellite" in data:
597
- stac_object.extra_fields["satellite"] = data["Satellite"]
598
- if "Sensor" in data:
599
- stac_object.extra_fields["sensor"] = data["Sensor"]
600
- if "Agency" in data:
601
- stac_object.extra_fields["agency"] = data["Agency"]
602
- if "yAxis" in data:
603
- stac_object.extra_fields["yAxis"] = data["yAxis"]
604
- if "EodashIdentifier" in data:
605
- stac_object.extra_fields["subcode"] = data["EodashIdentifier"]
606
- if "DataSource" in data:
607
- if "Spaceborne" in data["DataSource"]:
608
- if "Sensor" in data["DataSource"]["Spaceborne"]:
609
- stac_object.extra_fields["sensor"] = data["DataSource"]["Spaceborne"][
610
- "Sensor"
611
- ]
612
- if "Satellite" in data["DataSource"]["Spaceborne"]:
613
- stac_object.extra_fields["satellite"] = data["DataSource"][
614
- "Spaceborne"
615
- ]["Satellite"]
616
- if "InSitu" in data["DataSource"]:
617
- stac_object.extra_fields["insituSources"] = data["DataSource"]["InSitu"]
618
- if "Other" in data["DataSource"]:
619
- stac_object.extra_fields["otherSources"] = data["DataSource"]["Other"]
620
-
621
-
622
- def handle_GeoDB_endpoint(config, endpoint, data, catalog):
623
- collection, _ = get_or_create_collection(
624
- catalog, endpoint["CollectionId"], data, config, endpoint
625
- )
626
- select = "?select=aoi,aoi_id,country,city,time"
627
- url = (
628
- endpoint["EndPoint"]
629
- + endpoint["Database"]
630
- + "_%s" % endpoint["CollectionId"]
631
- + select
632
- )
633
- if additional_query_parameters := endpoint.get("AdditionalQueryString"):
634
- url += f"&{additional_query_parameters}"
635
- response = json.loads(requests.get(url).text)
636
-
637
- # Sort locations by key
638
- sorted_locations = sorted(response, key=itemgetter("aoi_id"))
639
- cities = []
640
- countries = []
641
- for key, value in groupby(sorted_locations, key=itemgetter("aoi_id")):
642
- # Finding min and max values for date
643
- values = [v for v in value]
644
- times = [datetime.fromisoformat(t["time"]) for t in values]
645
- unique_values = list({v["aoi_id"]: v for v in values}.values())[0]
646
- country = unique_values["country"]
647
- city = unique_values["city"]
648
- IdKey = endpoint.get("IdKey", "city")
649
- IdValue = unique_values[IdKey]
650
- if country not in countries:
651
- countries.append(country)
652
- # sanitize unique key identifier to be sure it is saveable as a filename
653
- if IdValue is not None:
654
- IdValue = "".join(
655
- [c for c in IdValue if c.isalpha() or c.isdigit() or c == " "]
656
- ).rstrip()
657
- # Additional check to see if unique key name is empty afterwards
658
- if IdValue == "" or IdValue is None:
659
- # use aoi_id as a fallback unique id instead of configured key
660
- IdValue = key
661
- if city not in cities:
662
- cities.append(city)
663
- min_date = min(times)
664
- max_date = max(times)
665
- latlon = unique_values["aoi"]
666
- [lat, lon] = [float(x) for x in latlon.split(",")]
667
- # create item for unique locations
668
- buff = 0.01
669
- bbox = [lon - buff, lat - buff, lon + buff, lat + buff]
670
- item = Item(
671
- id=IdValue,
672
- bbox=bbox,
673
- properties={},
674
- geometry=create_geojson_point(lon, lat),
675
- datetime=None,
676
- start_datetime=min_date,
677
- end_datetime=max_date,
678
- )
679
- link = collection.add_item(item)
680
- # bubble up information we want to the link
681
- link.extra_fields["id"] = key
682
- link.extra_fields["latlng"] = latlon
683
- link.extra_fields["country"] = country
684
- link.extra_fields["city"] = city
685
-
686
- if "yAxis" not in data:
687
- # fetch yAxis and store it to data, preventing need to save it per dataset in yml
688
- select = "?select=y_axis&limit=1"
689
- url = (
690
- endpoint["EndPoint"]
691
- + endpoint["Database"]
692
- + "_%s" % endpoint["CollectionId"]
693
- + select
694
- )
695
- response = json.loads(requests.get(url).text)
696
- yAxis = response[0]["y_axis"]
697
- data["yAxis"] = yAxis
698
- add_collection_information(config, collection, data)
699
- add_example_info(collection, data, endpoint, config)
700
-
701
- collection.update_extent_from_items()
702
- collection.summaries = Summaries(
703
- {
704
- "cities": cities,
705
- "countries": countries,
706
- }
707
- )
708
- return collection
709
-
710
-
711
- def handle_STAC_based_endpoint(config, endpoint, data, catalog, options, headers=None):
712
- if "Locations" in data:
713
- root_collection, _ = get_or_create_collection(
714
- catalog, data["Name"], data, config, endpoint
715
- )
716
- for location in data["Locations"]:
717
- if "FilterDates" in location:
718
- collection = process_STACAPI_Endpoint(
719
- config=config,
720
- endpoint=endpoint,
721
- data=data,
722
- catalog=catalog,
723
- options=options,
724
- headers=headers,
725
- bbox=",".join(map(str, location["Bbox"])),
726
- filter_dates=location["FilterDates"],
727
- root_collection=root_collection,
728
- )
729
- else:
730
- collection = process_STACAPI_Endpoint(
731
- config=config,
732
- endpoint=endpoint,
733
- data=data,
734
- catalog=catalog,
735
- options=options,
736
- headers=headers,
737
- bbox=",".join(map(str, location["Bbox"])),
738
- root_collection=root_collection,
739
- )
740
- # Update identifier to use location as well as title
741
- # TODO: should we use the name as id? it provides much more
742
- # information in the clients
743
- collection.id = location["Identifier"]
744
- collection.title = (location["Name"],)
745
- # See if description should be overwritten
746
- if "Description" in location:
747
- collection.description = location["Description"]
748
- else:
749
- collection.description = location["Name"]
750
- # TODO: should we remove all assets from sub collections?
751
- link = root_collection.add_child(collection)
752
- latlng = "%s,%s" % (location["Point"][1], location["Point"][0])
753
- # Add extra properties we need
754
- link.extra_fields["id"] = location["Identifier"]
755
- link.extra_fields["latlng"] = latlng
756
- link.extra_fields["name"] = location["Name"]
757
- add_example_info(collection, data, endpoint, config)
758
- if "OverwriteBBox" in location:
759
- collection.extent.spatial = SpatialExtent(
760
- [
761
- location["OverwriteBBox"],
762
- ]
763
- )
764
- root_collection.update_extent_from_items()
765
- # Add bbox extents from children
766
- for c_child in root_collection.get_children():
767
- root_collection.extent.spatial.bboxes.append(
768
- c_child.extent.spatial.bboxes[0]
769
- )
770
- else:
771
- if "Bbox" in endpoint:
772
- root_collection = process_STACAPI_Endpoint(
773
- config=config,
774
- endpoint=endpoint,
775
- data=data,
776
- catalog=catalog,
777
- options=options,
778
- headers=headers,
779
- bbox=",".join(map(str, endpoint["Bbox"])),
780
- )
781
- else:
782
- root_collection = process_STACAPI_Endpoint(
783
- config=config,
784
- endpoint=endpoint,
785
- data=data,
786
- catalog=catalog,
787
- options=options,
788
- headers=headers,
789
- )
790
-
791
- add_example_info(root_collection, data, endpoint, config)
792
- add_to_catalog(root_collection, catalog, endpoint, data)
793
-
794
-
795
- def add_base_overlay_info(collection, config, data):
796
- # check if default base layers defined
797
- if "default_base_layers" in config:
798
- with open("%s.yaml" % config["default_base_layers"]) as f:
799
- base_layers = yaml.load(f, Loader=SafeLoader)
800
- for layer in base_layers:
801
- collection.add_link(create_web_map_link(layer, role="baselayer"))
802
- # check if default overlay layers defined
803
- if "default_overlay_layers" in config:
804
- with open("%s.yaml" % config["default_overlay_layers"]) as f:
805
- overlay_layers = yaml.load(f, Loader=SafeLoader)
806
- for layer in overlay_layers:
807
- collection.add_link(create_web_map_link(layer, role="overlay"))
808
- if "BaseLayers" in data:
809
- for layer in data["BaseLayers"]:
810
- collection.add_link(create_web_map_link(layer, role="baselayer"))
811
- if "OverlayLayers" in data:
812
- for layer in data["OverlayLayers"]:
813
- collection.add_link(create_web_map_link(layer, role="overlay"))
814
- # TODO: possibility to overwrite default base and overlay layers
815
-
816
-
817
- def create_web_map_link(layer, role):
818
- extra_fields = {
819
- "roles": [role],
820
- "id": layer["id"],
821
- }
822
- if "default" in layer and layer["default"]:
823
- extra_fields["roles"].append("default")
824
- if "visible" in layer and layer["visible"]:
825
- extra_fields["roles"].append("visible")
826
- if "visible" in layer and not layer["visible"]:
827
- extra_fields["roles"].append("invisible")
828
-
829
- match layer["protocol"]:
830
- case "wms":
831
- # handle wms special config options
832
- extra_fields["wms:layers"] = layer["layers"]
833
- if "styles" in layer:
834
- extra_fields["wms:styles"] = layer["styles"]
835
- # TODO: handle wms dimensions extra_fields["wms:dimensions"]
836
- case "wmts":
837
- extra_fields["wmts:layer"] = layer["layer"]
838
- # TODO: handle wmts dimensions extra_fields["wmts:dimensions"]
839
-
840
- wml = Link(
841
- rel=layer["protocol"],
842
- target=layer["url"],
843
- media_type="image/png" if "media_type" not in layer else layer["media_type"],
844
- title=layer["name"],
845
- extra_fields=extra_fields,
846
- )
847
- return wml
848
-
849
-
850
- def add_example_info(stac_object, data, endpoint, config):
851
- if "Services" in data:
852
- for service in data["Services"]:
853
- if service["Name"] == "Statistical API":
854
- service_type = "byoc" if "Type" not in service else service["Type"]
855
- stac_object.add_link(
856
- Link(
857
- rel="example",
858
- target="%s/%s" % (config["assets_endpoint"], service["Script"]),
859
- title="evalscript",
860
- media_type="application/javascript",
861
- extra_fields={
862
- "example:language": "JavaScript",
863
- "dataId": "%s-%s" % (service_type, service["CollectionId"]),
864
- },
865
- )
866
- )
867
- if service["Name"] == "VEDA Statistics":
868
- stac_object.add_link(
869
- Link(
870
- rel="example",
871
- target=service["Endpoint"],
872
- title=service["Name"],
873
- media_type="application/json",
874
- extra_fields={
875
- "example:language": "JSON",
876
- },
877
- )
878
- )
879
- if service["Name"] == "EOxHub Notebook":
880
- # TODO: we need to consider if we can improve information added
881
- stac_object.add_link(
882
- Link(
883
- rel="example",
884
- target=service["Url"],
885
- title=(
886
- service["Title"] if "Title" in service else service["Name"]
887
- ),
888
- media_type="application/x-ipynb+json",
889
- extra_fields={
890
- "example:language": "Jupyter Notebook",
891
- "example:container": True,
892
- },
893
- )
894
- )
895
- elif "Resources" in data:
896
- for service in data["Resources"]:
897
- if service.get("Name") == "xcube":
898
- target_url = "%s/timeseries/%s/%s?aggMethods=median" % (
899
- endpoint["EndPoint"],
900
- endpoint["DatacubeId"],
901
- endpoint["Variable"],
902
- )
903
- stac_object.add_link(
904
- Link(
905
- rel="example",
906
- target=target_url,
907
- title=service["Name"] + " analytics",
908
- media_type="application/json",
909
- extra_fields={
910
- "example:language": "JSON",
911
- "example:method": "POST",
912
- },
913
- )
914
- )
915
-
916
-
917
- def generate_veda_cog_link(endpoint, file_url):
918
- bidx = ""
919
- if "Bidx" in endpoint:
920
- # Check if an array was provided
921
- if hasattr(endpoint["Bidx"], "__len__"):
922
- for band in endpoint["Bidx"]:
923
- bidx = bidx + "&bidx=%s" % (band)
924
- else:
925
- bidx = "&bidx=%s" % (endpoint["Bidx"])
926
-
927
- colormap = ""
928
- if "Colormap" in endpoint:
929
- colormap = "&colormap=%s" % (endpoint["Colormap"])
930
- # TODO: For now we assume a already urlparsed colormap definition
931
- # it could be nice to allow a json and better convert it on the fly
932
- # colormap = "&colormap=%s"%(urllib.parse.quote(str(endpoint["Colormap"])))
933
-
934
- colormap_name = ""
935
- if "ColormapName" in endpoint:
936
- colormap_name = "&colormap_name=%s" % (endpoint["ColormapName"])
937
-
938
- rescale = ""
939
- if "Rescale" in endpoint:
940
- rescale = "&rescale=%s,%s" % (endpoint["Rescale"][0], endpoint["Rescale"][1])
941
-
942
- if file_url:
943
- file_url = "url=%s&" % (file_url)
944
- else:
945
- file_url = ""
946
-
947
- target_url = (
948
- "https://staging-raster.delta-backend.com/cog/tiles/WebMercatorQuad/{z}/{x}/{y}?%sresampling_method=nearest%s%s%s%s"
949
- % (
950
- file_url,
951
- bidx,
952
- colormap,
953
- colormap_name,
954
- rescale,
955
- )
956
- )
957
- return target_url
958
-
959
-
960
- def generate_veda_tiles_link(endpoint, item):
961
- collection = "collection=%s" % endpoint["CollectionId"]
962
- assets = ""
963
- for asset in endpoint["Assets"]:
964
- assets += "&assets=%s" % asset
965
- color_formula = ""
966
- if "ColorFormula" in endpoint:
967
- color_formula = "&color_formula=%s" % endpoint["ColorFormula"]
968
- no_data = ""
969
- if "NoData" in endpoint:
970
- no_data = "&no_data=%s" % endpoint["NoData"]
971
- if item:
972
- item = "&item=%s" % (item)
973
- else:
974
- item = ""
975
- target_url = (
976
- "https://staging-raster.delta-backend.com/stac/tiles/WebMercatorQuad/{z}/{x}/{y}?%s%s%s%s%s"
977
- % (
978
- collection,
979
- item,
980
- assets,
981
- color_formula,
982
- no_data,
983
- )
984
- )
985
- return target_url
986
-
987
-
988
- def add_visualization_info(stac_object, data, endpoint, file_url=None, time=None):
989
- # add extension reference
990
- if endpoint["Name"] == "Sentinel Hub" or endpoint["Name"] == "Sentinel Hub WMS":
991
- instanceId = os.getenv("SH_INSTANCE_ID")
992
- if "InstanceId" in endpoint:
993
- instanceId = endpoint["InstanceId"]
994
- extra_fields = {
995
- "wms:layers": [endpoint["LayerId"]],
996
- "role": ["data"],
997
- }
998
- if time != None:
999
- if endpoint["Name"] == "Sentinel Hub WMS":
1000
- # SH WMS for public collections needs time interval, we use full day here
1001
- datetime_object = datetime.strptime(time, "%Y-%m-%d")
1002
- extra_fields["wms:dimensions"] = {
1003
- "TIME": "%s/%s"
1004
- % (
1005
- datetime_object.isoformat(),
1006
- (
1007
- datetime_object
1008
- + timedelta(days=1)
1009
- - timedelta(milliseconds=1)
1010
- ).isoformat(),
1011
- )
1012
- }
1013
- if endpoint["Name"] == "Sentinel Hub":
1014
- extra_fields["wms:dimensions"] = {"TIME": time}
1015
- stac_object.add_link(
1016
- Link(
1017
- rel="wms",
1018
- target="https://services.sentinel-hub.com/ogc/wms/%s" % (instanceId),
1019
- media_type=(
1020
- endpoint["MimeType"] if "MimeType" in endpoint else "image/png"
1021
- ),
1022
- title=data["Name"],
1023
- extra_fields=extra_fields,
1024
- )
1025
- )
1026
- # elif resource["Name"] == "GeoDB":
1027
- # pass
1028
- elif endpoint["Name"] == "WMS":
1029
- extra_fields = {
1030
- "wms:layers": [endpoint["LayerId"]],
1031
- "role": ["data"],
1032
- }
1033
- if time != None:
1034
- extra_fields["wms:dimensions"] = {
1035
- "TIME": time,
1036
- }
1037
- if "Styles" in endpoint:
1038
- extra_fields["wms:styles"] = endpoint["Styles"]
1039
- media_type = "image/jpeg"
1040
- if "MediaType" in endpoint:
1041
- media_type = endpoint["MediaType"]
1042
- stac_object.add_link(
1043
- Link(
1044
- rel="wms",
1045
- target=endpoint["EndPoint"],
1046
- media_type=media_type,
1047
- title=data["Name"],
1048
- extra_fields=extra_fields,
1049
- )
1050
- )
1051
- elif endpoint["Name"] == "JAXA_WMTS_PALSAR":
1052
- target_url = "%s" % (endpoint.get("EndPoint"),)
1053
- # custom time just for this special case as a default for collection wmts
1054
- extra_fields = {
1055
- "wmts:layer": endpoint.get("LayerId").replace("{time}", time or "2017")
1056
- }
1057
- stac_object.add_link(
1058
- Link(
1059
- rel="wmts",
1060
- target=target_url,
1061
- media_type="image/png",
1062
- title="wmts capabilities",
1063
- extra_fields=extra_fields,
1064
- )
1065
- )
1066
- elif endpoint["Name"] == "xcube":
1067
- if endpoint["Type"] == "zarr":
1068
- # either preset ColormapName of left as a template
1069
- cbar = endpoint.get("ColormapName", "{cbar}")
1070
- # either preset Rescale of left as a template
1071
- vmin = "{vmin}"
1072
- vmax = "{vmax}"
1073
- if "Rescale" in endpoint:
1074
- vmin = endpoint["Rescale"][0]
1075
- vmax = endpoint["Rescale"][1]
1076
- crs = endpoint.get("Crs", "EPSG:3857")
1077
- target_url = (
1078
- "%s/tiles/%s/%s/{z}/{y}/{x}?crs=%s&time={time}&vmin=%s&vmax=%s&cbar=%s"
1079
- % (
1080
- endpoint["EndPoint"],
1081
- endpoint["DatacubeId"],
1082
- endpoint["Variable"],
1083
- crs,
1084
- vmin,
1085
- vmax,
1086
- cbar,
1087
- )
1088
- )
1089
- stac_object.add_link(
1090
- Link(
1091
- rel="xyz",
1092
- target=target_url,
1093
- media_type="image/png",
1094
- title="xcube tiles",
1095
- )
1096
- )
1097
- elif endpoint["Type"] == "WMTSCapabilities":
1098
- target_url = "%s" % (endpoint.get("EndPoint"),)
1099
- extra_fields = {
1100
- "wmts:layer": endpoint.get("LayerId"),
1101
- "role": ["data"],
1102
- }
1103
- dimensions = {}
1104
- if time != None:
1105
- dimensions["time"] = time
1106
- if dimensions_config := endpoint.get("Dimensions", {}):
1107
- for key, value in dimensions_config.items():
1108
- dimensions[key] = value
1109
- if dimensions != {}:
1110
- extra_fields["wmts:dimensions"] = dimensions
1111
- stac_object.add_link(
1112
- Link(
1113
- rel="wmts",
1114
- target=target_url,
1115
- media_type="image/png",
1116
- title="wmts capabilities",
1117
- extra_fields=extra_fields,
1118
- )
1119
- )
1120
- elif endpoint["Name"] == "VEDA":
1121
- if endpoint["Type"] == "cog":
1122
- target_url = generate_veda_cog_link(endpoint, file_url)
1123
- elif endpoint["Type"] == "tiles":
1124
- target_url = generate_veda_tiles_link(endpoint, file_url)
1125
- if target_url:
1126
- stac_object.add_link(
1127
- Link(
1128
- rel="xyz",
1129
- target=target_url,
1130
- media_type="image/png",
1131
- title=data["Name"],
1132
- )
1133
- )
1134
- elif endpoint["Name"] == "GeoDB Vector Tiles":
1135
- # `${geoserverUrl}${config.layerName}@EPSG%3A${projString}@pbf/{z}/{x}/{-y}.pbf`,
1136
- # 'geodb_debd884d-92f9-4979-87b6-eadef1139394:GTIF_AT_Gemeinden_3857'
1137
- target_url = "%s%s:%s_%s@EPSG:3857@pbf/{z}/{x}/{-y}.pbf" % (
1138
- endpoint["EndPoint"],
1139
- endpoint["Instance"],
1140
- endpoint["Database"],
1141
- endpoint["CollectionId"],
1142
- )
1143
- stac_object.add_link(
1144
- Link(
1145
- rel="xyz",
1146
- target=target_url,
1147
- media_type="application/pbf",
1148
- title=data["Name"],
1149
- extra_fields={
1150
- "description": data["Title"],
1151
- "parameters": endpoint["Parameters"],
1152
- "matchKey": endpoint["MatchKey"],
1153
- "timeKey": endpoint["TimeKey"],
1154
- "source": endpoint["Source"],
1155
- "role": ["data"],
1156
- },
1157
- )
1158
- )
1159
- else:
1160
- print("Visualization endpoint not supported")
1161
-
1162
-
1163
- def process_STACAPI_Endpoint(
1164
- config,
1165
- endpoint,
1166
- data,
1167
- catalog,
1168
- options,
1169
- headers={},
1170
- bbox=None,
1171
- root_collection=None,
1172
- filter_dates=None,
1173
- ):
1174
- collection, _ = get_or_create_collection(
1175
- catalog, endpoint["CollectionId"], data, config, endpoint
1176
- )
1177
- # add_visualization_info(collection, data, endpoint)
1178
-
1179
- api = Client.open(endpoint["EndPoint"], headers=headers)
1180
- if bbox == None:
1181
- bbox = "-180,-90,180,90"
1182
- results = api.search(
1183
- collections=[endpoint["CollectionId"]],
1184
- bbox=bbox,
1185
- datetime=["1900-01-01T00:00:00Z", "3000-01-01T00:00:00Z"],
1186
- )
1187
- # We keep track of potential duplicate times in this list
1188
- added_times = {}
1189
- for item in results.items():
1190
- item_datetime = item.get_datetime()
1191
- if item_datetime != None:
1192
- iso_date = item_datetime.isoformat()[:10]
1193
- # if filterdates has been specified skip dates not listed in config
1194
- if filter_dates and iso_date not in filter_dates:
1195
- continue
1196
- if iso_date in added_times:
1197
- continue
1198
- added_times[iso_date] = True
1199
- link = collection.add_item(item)
1200
- if options.tn:
1201
- if "cog_default" in item.assets:
1202
- generate_thumbnail(
1203
- item, data, endpoint, item.assets["cog_default"].href
1204
- )
1205
- else:
1206
- generate_thumbnail(item, data, endpoint)
1207
- # Check if we can create visualization link
1208
- if "Assets" in endpoint:
1209
- add_visualization_info(item, data, endpoint, item.id)
1210
- link.extra_fields["item"] = item.id
1211
- elif "cog_default" in item.assets:
1212
- add_visualization_info(
1213
- item, data, endpoint, item.assets["cog_default"].href
1214
- )
1215
- link.extra_fields["cog_href"] = item.assets["cog_default"].href
1216
- elif item_datetime:
1217
- time_string = item_datetime.isoformat()[:-6] + "Z"
1218
- add_visualization_info(item, data, endpoint, time=time_string)
1219
- elif "start_datetime" in item.properties and "end_datetime" in item.properties:
1220
- add_visualization_info(
1221
- item,
1222
- data,
1223
- endpoint,
1224
- time="%s/%s"
1225
- % (item.properties["start_datetime"], item.properties["end_datetime"]),
1226
- )
1227
- # If a root collection exists we point back to it from the item
1228
- if root_collection != None:
1229
- item.set_collection(root_collection)
1230
-
1231
- # bubble up information we want to the link
1232
- # it is possible for datetime to be null, if it is start and end datetime have to exist
1233
- if item_datetime:
1234
- iso_time = item_datetime.isoformat()[:-6] + "Z"
1235
- if endpoint["Name"] == "Sentinel Hub":
1236
- # for SH WMS we only save the date (no time)
1237
- link.extra_fields["datetime"] = iso_date
1238
- else:
1239
- link.extra_fields["datetime"] = iso_time
1240
- else:
1241
- link.extra_fields["start_datetime"] = item.properties["start_datetime"]
1242
- link.extra_fields["end_datetime"] = item.properties["end_datetime"]
1243
-
1244
- collection.update_extent_from_items()
1245
-
1246
- # replace SH identifier with catalog identifier
1247
- collection.id = data["Name"]
1248
- add_collection_information(config, collection, data)
1249
-
1250
- # Check if we need to overwrite the bbox after update from items
1251
- if "OverwriteBBox" in endpoint:
1252
- collection.extent.spatial = SpatialExtent(
1253
- [
1254
- endpoint["OverwriteBBox"],
1255
- ]
1256
- )
1257
-
1258
- return collection
1259
-
1260
-
1261
- def fetch_and_save_thumbnail(data, url):
1262
- collection_path = "../thumbnails/%s_%s/" % (data["EodashIdentifier"], data["Name"])
1263
- Path(collection_path).mkdir(parents=True, exist_ok=True)
1264
- image_path = "%s/thumbnail.png" % (collection_path)
1265
- if not os.path.exists(image_path):
1266
- data = requests.get(url).content
1267
- f = open(image_path, "wb")
1268
- f.write(data)
1269
- f.close()
1270
-
1271
-
1272
- def generate_thumbnail(
1273
- stac_object, data, endpoint, file_url=None, time=None, styles=None
1274
- ):
1275
- if endpoint["Name"] == "Sentinel Hub" or endpoint["Name"] == "WMS":
1276
- instanceId = os.getenv("SH_INSTANCE_ID")
1277
- if "InstanceId" in endpoint:
1278
- instanceId = endpoint["InstanceId"]
1279
- # Build example url
1280
- wms_config = "REQUEST=GetMap&SERVICE=WMS&VERSION=1.3.0&FORMAT=image/png&STYLES=&TRANSPARENT=true"
1281
- bbox = "%s,%s,%s,%s" % (
1282
- stac_object.bbox[1],
1283
- stac_object.bbox[0],
1284
- stac_object.bbox[3],
1285
- stac_object.bbox[2],
1286
- )
1287
- output_format = (
1288
- "format=image/png&WIDTH=256&HEIGHT=128&CRS=EPSG:4326&BBOX=%s" % (bbox)
1289
- )
1290
- item_datetime = stac_object.get_datetime()
1291
- # it is possible for datetime to be null, if it is start and end datetime have to exist
1292
- if item_datetime:
1293
- time = item_datetime.isoformat()[:-6] + "Z"
1294
- url = "https://services.sentinel-hub.com/ogc/wms/%s?%s&layers=%s&time=%s&%s" % (
1295
- instanceId,
1296
- wms_config,
1297
- endpoint["LayerId"],
1298
- time,
1299
- output_format,
1300
- )
1301
- fetch_and_save_thumbnail(data, url)
1302
- elif endpoint["Name"] == "VEDA":
1303
- target_url = generate_veda_cog_link(endpoint, file_url)
1304
- # set to get 0/0/0 tile
1305
- url = re.sub(r"\{.\}", "0", target_url)
1306
- fetch_and_save_thumbnail(data, url)
1307
-
1308
-
1309
- def process_STAC_Datacube_Endpoint(config, endpoint, data, catalog):
1310
- collection, _ = get_or_create_collection(
1311
- catalog, data["Name"], data, config, endpoint
1312
- )
1313
- add_visualization_info(collection, data, endpoint)
1314
-
1315
- stac_endpoint_url = endpoint["EndPoint"]
1316
- if endpoint.get("Name") == "xcube":
1317
- stac_endpoint_url = stac_endpoint_url + endpoint.get("StacEndpoint", "")
1318
- # assuming /search not implemented
1319
- api = Client.open(stac_endpoint_url)
1320
- coll = api.get_collection(endpoint.get("CollectionId", "datacubes"))
1321
- item = coll.get_item(endpoint.get("DatacubeId"))
1322
- # slice a datacube along temporal axis to individual items, selectively adding properties
1323
- dimensions = item.properties.get("cube:dimensions", {})
1324
- variables = item.properties.get("cube:variables")
1325
- if not endpoint.get("Variable") in variables.keys():
1326
- raise Exception(
1327
- f'Variable {endpoint.get("Variable")} not found in datacube {variables}'
1328
- )
1329
- time_dimension = "time"
1330
- for k, v in dimensions.items():
1331
- if v.get("type") == "temporal":
1332
- time_dimension = k
1333
- break
1334
- time_entries = dimensions.get(time_dimension).get("values")
1335
- for t in time_entries:
1336
- item = Item(
1337
- id=t,
1338
- bbox=item.bbox,
1339
- properties={},
1340
- geometry=item.geometry,
1341
- datetime=parser.isoparse(t),
1342
- )
1343
- link = collection.add_item(item)
1344
- link.extra_fields["datetime"] = t
1345
- # bubble up information we want to the link
1346
- item_datetime = item.get_datetime()
1347
- # it is possible for datetime to be null, if it is start and end datetime have to exist
1348
- if item_datetime:
1349
- link.extra_fields["datetime"] = item_datetime.isoformat()[:-6] + "Z"
1350
- else:
1351
- link.extra_fields["start_datetime"] = item.properties["start_datetime"]
1352
- link.extra_fields["end_datetime"] = item.properties["end_datetime"]
1353
- unit = variables.get(endpoint.get("Variable")).get("unit")
1354
- if unit and "yAxis" not in data:
1355
- data["yAxis"] = unit
1356
- collection.update_extent_from_items()
1357
-
1358
- add_collection_information(config, collection, data)
1359
-
1360
- return collection
1361
-
1362
-
1363
- def add_collection_information(config, collection, data):
1364
- # Add metadata information
1365
- # Check license identifier
1366
- if "License" in data:
1367
- # Check if list was provided
1368
- if isinstance(data["License"], list):
1369
- if len(data["License"]) == 1:
1370
- collection.license = "proprietary"
1371
- link = Link(
1372
- rel="license",
1373
- target=data["License"][0]["Url"],
1374
- media_type=(
1375
- data["License"][0]["Type"]
1376
- if "Type" in data["License"][0]
1377
- else "text/html"
1378
- ),
1379
- )
1380
- if "Title" in data["License"][0]:
1381
- link.title = data["License"][0]["Title"]
1382
- collection.links.append(link)
1383
- elif len(data["License"]) > 1:
1384
- collection.license = "various"
1385
- for l in data["License"]:
1386
- link = Link(
1387
- rel="license",
1388
- target=l["Url"],
1389
- media_type="text/html" if "Type" in l else l["Type"],
1390
- )
1391
- if "Title" in l:
1392
- link.title = l["Title"]
1393
- collection.links.append(link)
1394
- else:
1395
- license = lookup.by_id(data["License"])
1396
- if license is not None:
1397
- collection.license = license.id
1398
- if license.sources:
1399
- # add links to licenses
1400
- for source in license.sources:
1401
- collection.links.append(
1402
- Link(
1403
- rel="license",
1404
- target=source,
1405
- media_type="text/html",
1406
- )
1407
- )
1408
- else:
1409
- # fallback to proprietary
1410
- print(
1411
- "WARNING: License could not be parsed, falling back to proprietary"
1412
- )
1413
- collection.license = "proprietary"
1414
- else:
1415
- # print("WARNING: No license was provided, falling back to proprietary")
1416
- pass
1417
-
1418
- if "Provider" in data:
1419
- try:
1420
- collection.providers = [
1421
- Provider(
1422
- # convert information to lower case
1423
- **dict((k.lower(), v) for k, v in provider.items())
1424
- )
1425
- for provider in data["Provider"]
1426
- ]
1427
- except:
1428
- print(
1429
- "WARNING: Issue creating provider information for collection: %s"
1430
- % collection.id
1431
- )
1432
-
1433
- if "Citation" in data:
1434
- if "DOI" in data["Citation"]:
1435
- collection.extra_fields["sci:doi"] = data["Citation"]["DOI"]
1436
- if "Citation" in data["Citation"]:
1437
- collection.extra_fields["sci:citation"] = data["Citation"]["Citation"]
1438
- if "Publication" in data["Citation"]:
1439
- collection.extra_fields["sci:publications"] = [
1440
- # convert keys to lower case
1441
- dict((k.lower(), v) for k, v in publication.items())
1442
- for publication in data["Citation"]["Publication"]
1443
- ]
1444
-
1445
- if "Subtitle" in data:
1446
- collection.extra_fields["subtitle"] = data["Subtitle"]
1447
- if "Legend" in data:
1448
- collection.add_asset(
1449
- "legend",
1450
- Asset(
1451
- href="%s/%s" % (config["assets_endpoint"], data["Legend"]),
1452
- media_type="image/png",
1453
- roles=["metadata"],
1454
- ),
1455
- )
1456
- if "Story" in data:
1457
- collection.add_asset(
1458
- "story",
1459
- Asset(
1460
- href="%s/%s" % (config["assets_endpoint"], data["Story"]),
1461
- media_type="text/markdown",
1462
- roles=["metadata"],
1463
- ),
1464
- )
1465
- if "Image" in data:
1466
- collection.add_asset(
1467
- "thumbnail",
1468
- Asset(
1469
- href="%s/%s" % (config["assets_endpoint"], data["Image"]),
1470
- media_type="image/png",
1471
- roles=["thumbnail"],
1472
- ),
1473
- )
1474
- # Add extra fields to collection if available
1475
- add_extra_fields(collection, data)
1476
-
1477
- if "References" in data:
1478
- generic_counter = 1
1479
- for ref in data["References"]:
1480
- if "Key" in ref:
1481
- key = ref["Key"]
1482
- else:
1483
- key = "reference_%s" % generic_counter
1484
- generic_counter = generic_counter + 1
1485
- collection.add_asset(
1486
- key,
1487
- Asset(
1488
- href=ref["Url"],
1489
- title=ref["Name"],
1490
- media_type=ref["MediaType"] if "MediaType" in ref else "text/html",
1491
- roles=["metadata"],
1492
- ),
1493
- )
1494
-
1495
-
1496
- @dataclass
1497
- class Options:
1498
- catalogspath: str
1499
- collectionspath: str
1500
- indicatorspath: str
1501
- outputpath: str
1502
- vd: bool
1503
- ni: bool
1504
- tn: bool
1505
- collections: List[str]
1506
-
1507
-
1508
354
  @click.command()
1509
355
  @click.option(
1510
356
  "--catalog",
@@ -1541,9 +387,7 @@ class Options:
1541
387
  is_flag=True,
1542
388
  help="validation flag, if set, validation will be run on generated catalogs",
1543
389
  )
1544
- @click.option(
1545
- "-ni", is_flag=True, help="no items flag, if set, items will not be saved"
1546
- )
390
+ @click.option("-ni", is_flag=True, help="no items flag, if set, items will not be saved")
1547
391
  @click.option(
1548
392
  "-tn",
1549
393
  is_flag=True,
@@ -1579,14 +423,11 @@ def process_catalogs(
1579
423
  )
1580
424
  tasks = []
1581
425
  for file_name in os.listdir(catalogspath):
1582
- file_path = os.path.join(catalogspath, file_name)
1583
- if os.path.isfile(file_path):
1584
- if catalog == None or os.path.splitext(file_name)[0] == catalog:
1585
- tasks.append(
1586
- RaisingThread(
1587
- target=process_catalog_file, args=(file_path, options)
1588
- )
1589
- )
1590
- tasks[-1].start()
426
+ file_path = f"{catalogspath}/{file_name}"
427
+ if os.path.isfile(file_path) and (
428
+ catalog is None or os.path.splitext(file_name)[0] == catalog
429
+ ):
430
+ tasks.append(RaisingThread(target=process_catalog_file, args=(file_path, options)))
431
+ tasks[-1].start()
1591
432
  for task in tasks:
1592
433
  task.join()