eodash_catalog 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of eodash_catalog might be problematic. Click here for more details.

@@ -1,4 +1,4 @@
1
1
  # SPDX-FileCopyrightText: 2024-present Daniel Santillan <daniel.santillan@eox.at>
2
2
  #
3
3
  # SPDX-License-Identifier: MIT
4
- __version__ = "0.1.8"
4
+ __version__ = "0.1.10"
@@ -17,6 +17,7 @@ from eodash_catalog.sh_endpoint import get_SH_token
17
17
  from eodash_catalog.stac_handling import (
18
18
  add_collection_information,
19
19
  add_example_info,
20
+ add_process_info_child_collection,
20
21
  add_projection_info,
21
22
  get_collection_datetimes_from_config,
22
23
  get_or_create_collection,
@@ -174,7 +175,7 @@ def handle_STAC_based_endpoint(
174
175
  collection.description = location["Name"]
175
176
  # TODO: should we remove all assets from sub collections?
176
177
  link = root_collection.add_child(collection)
177
- latlng = f'{location["Point"][1]},{location["Point"][0]}'
178
+ latlng = f'{location["Point"][1]},{location["Point"][0]}'.strip()
178
179
  # Add extra properties we need
179
180
  link.extra_fields["id"] = location["Identifier"]
180
181
  link.extra_fields["latlng"] = latlng
@@ -182,6 +183,7 @@ def handle_STAC_based_endpoint(
182
183
  add_example_info(collection, collection_config, endpoint_config, catalog_config)
183
184
  # eodash v4 compatibility
184
185
  add_visualization_info(collection, collection_config, endpoint_config)
186
+ add_process_info_child_collection(collection, catalog_config, collection_config)
185
187
  if "OverwriteBBox" in location:
186
188
  collection.extent.spatial = SpatialExtent(
187
189
  [
@@ -208,7 +210,7 @@ def handle_STAC_based_endpoint(
208
210
  )
209
211
  # eodash v4 compatibility
210
212
  add_visualization_info(root_collection, collection_config, endpoint_config)
211
- add_collection_information(catalog_config, root_collection, collection_config)
213
+ add_collection_information(catalog_config, root_collection, collection_config, True)
212
214
  add_example_info(root_collection, collection_config, endpoint_config, catalog_config)
213
215
  return root_collection
214
216
 
@@ -406,7 +408,7 @@ def handle_SH_WMS_endpoint(
406
408
 
407
409
  link = root_collection.add_child(collection)
408
410
  # bubble up information we want to the link
409
- latlng = "{},{}".format(location["Point"][1], location["Point"][0])
411
+ latlng = "{},{}".format(location["Point"][1], location["Point"][0]).strip()
410
412
  link.extra_fields["id"] = location["Identifier"]
411
413
  link.extra_fields["latlng"] = latlng
412
414
  link.extra_fields["country"] = location["Country"]
@@ -416,6 +418,7 @@ def handle_SH_WMS_endpoint(
416
418
  else:
417
419
  LOGGER.warn(f"NO datetimes configured for collection: {collection_config['Name']}!")
418
420
  add_visualization_info(collection, collection_config, endpoint_config)
421
+ add_process_info_child_collection(collection, catalog_config, collection_config)
419
422
 
420
423
  root_collection.update_extent_from_items()
421
424
  # Add bbox extents from children
@@ -443,7 +446,7 @@ def handle_SH_WMS_endpoint(
443
446
  item_link = root_collection.add_item(item)
444
447
  item_link.extra_fields["datetime"] = format_datetime_to_isostring_zulu(dt)
445
448
  # eodash v4 compatibility
446
- add_collection_information(catalog_config, root_collection, collection_config)
449
+ add_collection_information(catalog_config, root_collection, collection_config, True)
447
450
  add_visualization_info(root_collection, collection_config, endpoint_config)
448
451
  return root_collection
449
452
 
@@ -528,7 +531,7 @@ def handle_GeoDB_endpoint(
528
531
  id=IdValue,
529
532
  bbox=bbox,
530
533
  properties={},
531
- geometry=create_geojson_point(lon, lat),
534
+ geometry=create_geojson_point(lon, lat)["geometry"],
532
535
  datetime=None,
533
536
  start_datetime=min_date,
534
537
  end_datetime=max_date,
@@ -990,7 +993,7 @@ def handle_raw_source(
990
993
  id=format_datetime_to_isostring_zulu(dt),
991
994
  bbox=bbox,
992
995
  properties={},
993
- geometry=create_geojson_from_bbox(bbox),
996
+ geometry=create_geojson_from_bbox(bbox)["features"][0]["geometry"],
994
997
  datetime=dt,
995
998
  assets=assets,
996
999
  extra_fields={},
@@ -9,13 +9,11 @@ import time
9
9
  from typing import Any
10
10
 
11
11
  import click
12
- import yaml
13
12
  from dotenv import load_dotenv
14
13
  from pystac import Catalog, CatalogType, Collection, Link, Summaries
15
14
  from pystac.layout import TemplateLayoutStrategy
16
15
  from pystac.validation import validate_all
17
16
  from structlog import get_logger
18
- from yaml.loader import SafeLoader
19
17
 
20
18
  from eodash_catalog.endpoints import (
21
19
  handle_collection_only,
@@ -42,6 +40,7 @@ from eodash_catalog.utils import (
42
40
  RaisingThread,
43
41
  add_single_item_if_collection_empty,
44
42
  iter_len_at_least,
43
+ read_config_file,
45
44
  recursive_save,
46
45
  retry,
47
46
  )
@@ -53,76 +52,72 @@ LOGGER = get_logger(__name__)
53
52
 
54
53
  def process_catalog_file(file_path: str, options: Options):
55
54
  LOGGER.info(f"Processing catalog: {file_path}")
56
- with open(file_path) as f:
57
- catalog_config: dict = yaml.load(f, Loader=SafeLoader)
58
-
59
- if len(options.collections) > 0:
60
- # create only catalogs containing the passed collections
61
- process_collections = [
62
- c for c in catalog_config["collections"] if c in options.collections
63
- ]
64
- elif (len(options.collections) == 1 and options.collections == "all") or len(
65
- options.collections
66
- ) == 0:
67
- # create full catalog
68
- process_collections = catalog_config["collections"]
69
- if len(process_collections) == 0:
70
- LOGGER.info("No applicable collections found for catalog, skipping creation")
71
- return
72
- catalog = Catalog(
73
- id=catalog_config["id"],
74
- description=catalog_config["description"],
75
- title=catalog_config["title"],
76
- catalog_type=CatalogType.RELATIVE_PUBLISHED,
77
- )
78
- for collection in process_collections:
79
- file_path = f"{options.collectionspath}/{collection}.yaml"
80
- if os.path.isfile(file_path):
81
- # if collection file exists process it as indicator
82
- # collection will be added as single collection to indicator
83
- process_indicator_file(catalog_config, file_path, catalog, options)
84
- else:
85
- # if not try to see if indicator definition available
86
- file_path = f"{options.indicatorspath}/{collection}.yaml"
87
- if os.path.isfile(file_path):
88
- process_indicator_file(
89
- catalog_config,
90
- f"{options.indicatorspath}/{collection}.yaml",
91
- catalog,
92
- options,
93
- )
94
- else:
95
- LOGGER.info(f"Warning: neither collection nor indicator found for {collection}")
96
- if "MapProjection" in catalog_config:
97
- catalog.extra_fields["eodash:mapProjection"] = catalog_config["MapProjection"]
55
+ catalog_config: dict = read_config_file(file_path)
56
+ if len(options.collections) > 0:
57
+ # create only catalogs containing the passed collections
58
+ process_collections = [c for c in catalog_config["collections"] if c in options.collections]
59
+ elif (len(options.collections) == 1 and options.collections == "all") or len(
60
+ options.collections
61
+ ) == 0:
62
+ # create full catalog
63
+ process_collections = catalog_config["collections"]
64
+ if len(process_collections) == 0:
65
+ LOGGER.info("No applicable collections found for catalog, skipping creation")
66
+ return
67
+ catalog = Catalog(
68
+ id=catalog_config["id"],
69
+ description=catalog_config["description"],
70
+ title=catalog_config["title"],
71
+ catalog_type=CatalogType.RELATIVE_PUBLISHED,
72
+ )
73
+ for collection in process_collections:
74
+ file_path = f"{options.collectionspath}/{collection}"
75
+ try:
76
+ # if collection file exists process it as indicator
77
+ # collection will be added as single collection to indicator
78
+ process_indicator_file(catalog_config, file_path, catalog, options)
79
+ except FileNotFoundError:
80
+ # if not exists try to see if indicator definition available
81
+ file_path_indicator = f"{options.indicatorspath}/{collection}"
82
+ try:
83
+ process_indicator_file(
84
+ catalog_config,
85
+ file_path_indicator,
86
+ catalog,
87
+ options,
88
+ )
89
+ except FileNotFoundError:
90
+ LOGGER.info(f"Warning: neither collection nor indicator found for {collection}")
91
+ if "MapProjection" in catalog_config:
92
+ catalog.extra_fields["eodash:mapProjection"] = catalog_config["MapProjection"]
98
93
 
99
- strategy = TemplateLayoutStrategy(item_template="${collection}/${year}")
100
- # expecting that the catalog will be hosted online, self url should correspond to that
101
- # default to a local folder + catalog id in case not set
94
+ strategy = TemplateLayoutStrategy(item_template="${collection}/${year}")
95
+ # expecting that the catalog will be hosted online, self url should correspond to that
96
+ # default to a local folder + catalog id in case not set
102
97
 
103
- LOGGER.info("Started creation of collection files")
104
- start = time.time()
105
- if options.ni:
106
- catalog_self_href = f'{options.outputpath}/{catalog_config["id"]}'
107
- catalog.normalize_hrefs(catalog_self_href, strategy=strategy)
108
- recursive_save(catalog, options.ni)
109
- else:
110
- # For full catalog save with items this still seems to be faster
111
- catalog_self_href = catalog_config.get(
112
- "endpoint", "{}/{}".format(options.outputpath, catalog_config["id"])
113
- )
114
- catalog.normalize_hrefs(catalog_self_href, strategy=strategy)
115
- catalog.save(dest_href="{}/{}".format(options.outputpath, catalog_config["id"]))
116
- end = time.time()
117
- LOGGER.info(f"Catalog {catalog_config['id']}: Time consumed in saving: {end - start}")
98
+ LOGGER.info("Started creation of collection files")
99
+ start = time.time()
100
+ if options.ni or options.gp:
101
+ catalog_self_href = f'{options.outputpath}/{catalog_config["id"]}'
102
+ catalog.normalize_hrefs(catalog_self_href, strategy=strategy)
103
+ recursive_save(catalog, options.ni, options.gp)
104
+ else:
105
+ # For full catalog save with items this still seems to be faster
106
+ catalog_self_href = catalog_config.get(
107
+ "endpoint", "{}/{}".format(options.outputpath, catalog_config["id"])
108
+ )
109
+ catalog.normalize_hrefs(catalog_self_href, strategy=strategy)
110
+ catalog.save(dest_href="{}/{}".format(options.outputpath, catalog_config["id"]))
111
+ end = time.time()
112
+ LOGGER.info(f"Catalog {catalog_config['id']}: Time consumed in saving: {end - start}")
118
113
 
119
- if options.vd:
120
- # try to validate catalog if flag was set
121
- LOGGER.info(f"Running validation of catalog {file_path}")
122
- try:
123
- validate_all(catalog.to_dict(), href=catalog_config["endpoint"])
124
- except Exception as e:
125
- LOGGER.info(f"Issue validation collection: {e}")
114
+ if options.vd:
115
+ # try to validate catalog if flag was set
116
+ LOGGER.info(f"Running validation of catalog {file_path}")
117
+ try:
118
+ validate_all(catalog.to_dict(), href=catalog_config["endpoint"])
119
+ except Exception as e:
120
+ LOGGER.info(f"Issue validation collection: {e}")
126
121
 
127
122
 
128
123
  def extract_indicator_info(parent_collection: Collection):
@@ -166,37 +161,36 @@ def extract_indicator_info(parent_collection: Collection):
166
161
  def process_indicator_file(
167
162
  catalog_config: dict, file_path: str, catalog: Catalog, options: Options
168
163
  ):
169
- with open(file_path) as f:
170
- LOGGER.info(f"Processing indicator: {file_path}")
171
- indicator_config: dict = yaml.load(f, Loader=SafeLoader)
172
- parent_indicator = get_or_create_collection(
173
- catalog, indicator_config["Name"], indicator_config, catalog_config, {}
174
- )
175
- if "Collections" in indicator_config:
176
- for collection in indicator_config["Collections"]:
177
- process_collection_file(
178
- catalog_config,
179
- f"{options.collectionspath}/{collection}.yaml",
180
- parent_indicator,
181
- options,
182
- "Disable" in indicator_config and collection in indicator_config["Disable"],
183
- )
184
- else:
185
- # we assume that collection files can also be loaded directly
186
- process_collection_file(catalog_config, file_path, parent_indicator, options)
187
- add_collection_information(catalog_config, parent_indicator, indicator_config)
188
- if iter_len_at_least(parent_indicator.get_items(recursive=True), 1):
189
- parent_indicator.update_extent_from_items()
190
- # Add bbox extents from children
191
- for c_child in parent_indicator.get_children():
192
- if isinstance(c_child, Collection): # typing reason
193
- parent_indicator.extent.spatial.bboxes.append(c_child.extent.spatial.bboxes[0])
194
- # extract collection information and add it to summary indicator level
195
- extract_indicator_info(parent_indicator)
196
- add_process_info(parent_indicator, catalog_config, indicator_config)
197
- # add baselayer and overview information to indicator collection
198
- add_base_overlay_info(parent_indicator, catalog_config, indicator_config)
199
- add_to_catalog(parent_indicator, catalog, {}, indicator_config)
164
+ LOGGER.info(f"Processing indicator: {file_path}")
165
+ indicator_config = read_config_file(file_path)
166
+ parent_indicator = get_or_create_collection(
167
+ catalog, indicator_config["Name"], indicator_config, catalog_config, {}
168
+ )
169
+ if "Collections" in indicator_config:
170
+ for collection in indicator_config["Collections"]:
171
+ process_collection_file(
172
+ catalog_config,
173
+ f"{options.collectionspath}/{collection}",
174
+ parent_indicator,
175
+ options,
176
+ "Disable" in indicator_config and collection in indicator_config["Disable"],
177
+ )
178
+ else:
179
+ # we assume that collection files can also be loaded directly
180
+ process_collection_file(catalog_config, file_path, parent_indicator, options)
181
+ add_collection_information(catalog_config, parent_indicator, indicator_config, True)
182
+ if iter_len_at_least(parent_indicator.get_items(recursive=True), 1):
183
+ parent_indicator.update_extent_from_items()
184
+ # Add bbox extents from children
185
+ for c_child in parent_indicator.get_children():
186
+ if isinstance(c_child, Collection): # typing reason
187
+ parent_indicator.extent.spatial.bboxes.append(c_child.extent.spatial.bboxes[0])
188
+ # extract collection information and add it to summary indicator level
189
+ extract_indicator_info(parent_indicator)
190
+ add_process_info(parent_indicator, catalog_config, indicator_config)
191
+ # add baselayer and overview information to indicator collection
192
+ add_base_overlay_info(parent_indicator, catalog_config, indicator_config)
193
+ add_to_catalog(parent_indicator, catalog, {}, indicator_config)
200
194
 
201
195
 
202
196
  @retry((Exception), tries=3, delay=5, backoff=2, logger=LOGGER)
@@ -208,164 +202,159 @@ def process_collection_file(
208
202
  disable=False,
209
203
  ):
210
204
  LOGGER.info(f"Processing collection: {file_path}")
211
- with open(file_path) as f:
212
- collection_config: dict = yaml.load(f, Loader=SafeLoader)
213
- if "Resources" in collection_config:
214
- for endpoint_config in collection_config["Resources"]:
215
- try:
216
- collection = None
217
- if endpoint_config["Name"] == "Sentinel Hub":
218
- collection = handle_SH_endpoint(
219
- catalog_config, endpoint_config, collection_config, catalog, options
220
- )
221
- elif endpoint_config["Name"] == "Sentinel Hub WMS":
222
- collection = handle_SH_WMS_endpoint(
223
- catalog_config, endpoint_config, collection_config, catalog
224
- )
225
- elif endpoint_config["Name"] == "GeoDB":
226
- collection = handle_GeoDB_endpoint(
227
- catalog_config, endpoint_config, collection_config, catalog
228
- )
229
- elif endpoint_config["Name"] == "VEDA":
230
- collection = handle_VEDA_endpoint(
231
- catalog_config, endpoint_config, collection_config, catalog, options
232
- )
233
- elif endpoint_config["Name"] == "marinedatastore":
234
- collection = handle_WMS_endpoint(
235
- catalog_config, endpoint_config, collection_config, catalog, wmts=True
236
- )
237
- elif endpoint_config["Name"] == "xcube":
238
- collection = handle_xcube_endpoint(
239
- catalog_config, endpoint_config, collection_config, catalog
240
- )
241
- elif endpoint_config["Name"] == "rasdaman":
242
- collection = handle_rasdaman_endpoint(
243
- catalog_config, endpoint_config, collection_config, catalog
244
- )
245
- elif endpoint_config["Name"] == "WMS":
246
- collection = handle_WMS_endpoint(
247
- catalog_config, endpoint_config, collection_config, catalog
248
- )
249
- elif endpoint_config["Name"] == "JAXA_WMTS_PALSAR":
250
- # somewhat one off creation of individual WMTS layers as individual items
251
- collection = handle_WMS_endpoint(
252
- catalog_config, endpoint_config, collection_config, catalog, wmts=True
253
- )
254
- elif endpoint_config["Name"] == "Collection-only":
255
- collection = handle_collection_only(
256
- catalog_config, endpoint_config, collection_config, catalog
257
- )
258
- elif endpoint_config["Name"] == "Custom-Endpoint":
259
- collection = handle_custom_endpoint(
260
- catalog_config,
261
- endpoint_config,
262
- collection_config,
263
- catalog,
264
- )
265
- elif endpoint_config["Name"] in [
266
- "COG source",
267
- "GeoJSON source",
268
- "FlatGeobuf source",
269
- ]:
270
- collection = handle_raw_source(
271
- catalog_config, endpoint_config, collection_config, catalog
272
- )
273
- else:
274
- raise ValueError("Type of Resource is not supported")
275
- if collection:
276
- add_single_item_if_collection_empty(collection)
277
- add_projection_info(endpoint_config, collection)
278
- add_to_catalog(
279
- collection, catalog, endpoint_config, collection_config, disable
280
- )
281
- else:
282
- raise Exception(
283
- f"No collection was generated for resource {endpoint_config}"
284
- )
285
- except Exception as e:
286
- LOGGER.warn(f"""Exception: {e.args[0]} with config: {endpoint_config}""")
287
- raise e
288
-
289
- elif "Subcollections" in collection_config:
290
- # if no endpoint is specified we check for definition of subcollections
291
- parent_collection = get_or_create_collection(
292
- catalog, collection_config["Name"], collection_config, catalog_config, {}
293
- )
294
-
295
- locations = []
296
- countries = []
297
- for sub_coll_def in collection_config["Subcollections"]:
298
- # Subcollection has only data on one location which
299
- # is defined for the entire collection
300
- if "Name" in sub_coll_def and "Point" in sub_coll_def:
301
- locations.append(sub_coll_def["Name"])
302
- if isinstance(sub_coll_def["Country"], list):
303
- countries.extend(sub_coll_def["Country"])
304
- else:
305
- countries.append(sub_coll_def["Country"])
306
- process_collection_file(
307
- catalog_config,
308
- "{}/{}.yaml".format(options.collectionspath, sub_coll_def["Collection"]),
309
- parent_collection,
310
- options,
205
+ collection_config = read_config_file(file_path)
206
+ if "Resources" in collection_config:
207
+ for endpoint_config in collection_config["Resources"]:
208
+ try:
209
+ collection = None
210
+ if endpoint_config["Name"] == "Sentinel Hub":
211
+ collection = handle_SH_endpoint(
212
+ catalog_config, endpoint_config, collection_config, catalog, options
311
213
  )
312
- # find link in parent collection to update metadata
313
- for link in parent_collection.links:
314
- if (
315
- link.rel == "child"
316
- and "id" in link.extra_fields
317
- and link.extra_fields["id"] == sub_coll_def["Identifier"]
318
- ):
319
- latlng = "{},{}".format(
320
- sub_coll_def["Point"][1],
321
- sub_coll_def["Point"][0],
322
- )
323
- link.extra_fields["id"] = sub_coll_def["Identifier"]
324
- link.extra_fields["latlng"] = latlng
325
- link.extra_fields["name"] = sub_coll_def["Name"]
326
- # Update title of collection to use location name
327
- sub_collection = parent_collection.get_child(id=sub_coll_def["Identifier"])
328
- if sub_collection:
329
- sub_collection.title = sub_coll_def["Name"]
330
- # The subcollection has multiple locations which need to be extracted
331
- # and elevated to parent collection level
332
- else:
333
- # create temp catalog to save collection
334
- tmp_catalog = Catalog(id="tmp_catalog", description="temp catalog placeholder")
335
- process_collection_file(
214
+ elif endpoint_config["Name"] == "Sentinel Hub WMS":
215
+ collection = handle_SH_WMS_endpoint(
216
+ catalog_config, endpoint_config, collection_config, catalog
217
+ )
218
+ elif endpoint_config["Name"] == "GeoDB":
219
+ collection = handle_GeoDB_endpoint(
220
+ catalog_config, endpoint_config, collection_config, catalog
221
+ )
222
+ elif endpoint_config["Name"] == "VEDA":
223
+ collection = handle_VEDA_endpoint(
224
+ catalog_config, endpoint_config, collection_config, catalog, options
225
+ )
226
+ elif endpoint_config["Name"] == "marinedatastore":
227
+ collection = handle_WMS_endpoint(
228
+ catalog_config, endpoint_config, collection_config, catalog, wmts=True
229
+ )
230
+ elif endpoint_config["Name"] == "xcube":
231
+ collection = handle_xcube_endpoint(
232
+ catalog_config, endpoint_config, collection_config, catalog
233
+ )
234
+ elif endpoint_config["Name"] == "rasdaman":
235
+ collection = handle_rasdaman_endpoint(
236
+ catalog_config, endpoint_config, collection_config, catalog
237
+ )
238
+ elif endpoint_config["Name"] == "WMS":
239
+ collection = handle_WMS_endpoint(
240
+ catalog_config, endpoint_config, collection_config, catalog
241
+ )
242
+ elif endpoint_config["Name"] == "JAXA_WMTS_PALSAR":
243
+ # somewhat one off creation of individual WMTS layers as individual items
244
+ collection = handle_WMS_endpoint(
245
+ catalog_config, endpoint_config, collection_config, catalog, wmts=True
246
+ )
247
+ elif endpoint_config["Name"] == "Collection-only":
248
+ collection = handle_collection_only(
249
+ catalog_config, endpoint_config, collection_config, catalog
250
+ )
251
+ elif endpoint_config["Name"] == "Custom-Endpoint":
252
+ collection = handle_custom_endpoint(
336
253
  catalog_config,
337
- "{}/{}.yaml".format(options.collectionspath, sub_coll_def["Collection"]),
338
- tmp_catalog,
339
- options,
254
+ endpoint_config,
255
+ collection_config,
256
+ catalog,
340
257
  )
341
- links = tmp_catalog.get_child(sub_coll_def["Identifier"]).get_links() # type: ignore
342
- for link in links:
343
- # extract summary information
344
- if "city" in link.extra_fields:
345
- locations.append(link.extra_fields["city"])
346
- if "country" in link.extra_fields:
347
- if isinstance(link.extra_fields["country"], list):
348
- countries.extend(link.extra_fields["country"])
349
- else:
350
- countries.append(link.extra_fields["country"])
258
+ elif endpoint_config["Name"] in [
259
+ "COG source",
260
+ "GeoJSON source",
261
+ "FlatGeobuf source",
262
+ ]:
263
+ collection = handle_raw_source(
264
+ catalog_config, endpoint_config, collection_config, catalog
265
+ )
266
+ else:
267
+ raise ValueError("Type of Resource is not supported")
268
+ if collection:
269
+ add_single_item_if_collection_empty(collection)
270
+ add_projection_info(endpoint_config, collection)
271
+ add_to_catalog(collection, catalog, endpoint_config, collection_config, disable)
272
+ else:
273
+ raise Exception(f"No collection was generated for resource {endpoint_config}")
274
+ except Exception as e:
275
+ LOGGER.warn(f"""Exception: {e.args[0]} with config: {endpoint_config}""")
276
+ raise e
351
277
 
352
- parent_collection.add_links(links)
278
+ elif "Subcollections" in collection_config:
279
+ # if no endpoint is specified we check for definition of subcollections
280
+ parent_collection = get_or_create_collection(
281
+ catalog, collection_config["Name"], collection_config, catalog_config, {}
282
+ )
353
283
 
354
- add_collection_information(catalog_config, parent_collection, collection_config)
355
- add_process_info(catalog_config, parent_collection, collection_config)
356
- parent_collection.update_extent_from_items()
357
- # Add bbox extents from children
358
- for c_child in parent_collection.get_children():
359
- if isinstance(c_child, Collection):
360
- parent_collection.extent.spatial.bboxes.append(c_child.extent.spatial.bboxes[0])
361
- # Fill summaries for locations
362
- parent_collection.summaries = Summaries(
363
- {
364
- "cities": list(set(locations)),
365
- "countries": list(set(countries)),
366
- }
367
- )
368
- add_to_catalog(parent_collection, catalog, {}, collection_config)
284
+ locations = []
285
+ countries = []
286
+ for sub_coll_def in collection_config["Subcollections"]:
287
+ # Subcollection has only data on one location which
288
+ # is defined for the entire collection
289
+ if "Name" in sub_coll_def and "Point" in sub_coll_def:
290
+ locations.append(sub_coll_def["Name"])
291
+ if isinstance(sub_coll_def["Country"], list):
292
+ countries.extend(sub_coll_def["Country"])
293
+ else:
294
+ countries.append(sub_coll_def["Country"])
295
+ process_collection_file(
296
+ catalog_config,
297
+ "{}/{}".format(options.collectionspath, sub_coll_def["Collection"]),
298
+ parent_collection,
299
+ options,
300
+ )
301
+ # find link in parent collection to update metadata
302
+ for link in parent_collection.links:
303
+ if (
304
+ link.rel == "child"
305
+ and "id" in link.extra_fields
306
+ and link.extra_fields["id"] == sub_coll_def["Identifier"]
307
+ ):
308
+ latlng = "{},{}".format(
309
+ sub_coll_def["Point"][1],
310
+ sub_coll_def["Point"][0],
311
+ )
312
+ link.extra_fields["id"] = sub_coll_def["Identifier"]
313
+ link.extra_fields["latlng"] = latlng
314
+ link.extra_fields["name"] = sub_coll_def["Name"]
315
+ # Update title of collection to use location name
316
+ sub_collection = parent_collection.get_child(id=sub_coll_def["Identifier"])
317
+ if sub_collection:
318
+ sub_collection.title = sub_coll_def["Name"]
319
+ # The subcollection has multiple locations which need to be extracted
320
+ # and elevated to parent collection level
321
+ else:
322
+ # create temp catalog to save collection
323
+ tmp_catalog = Catalog(id="tmp_catalog", description="temp catalog placeholder")
324
+ process_collection_file(
325
+ catalog_config,
326
+ "{}/{}".format(options.collectionspath, sub_coll_def["Collection"]),
327
+ tmp_catalog,
328
+ options,
329
+ )
330
+ links = tmp_catalog.get_child(sub_coll_def["Identifier"]).get_links() # type: ignore
331
+ for link in links:
332
+ # extract summary information
333
+ if "city" in link.extra_fields:
334
+ locations.append(link.extra_fields["city"])
335
+ if "country" in link.extra_fields:
336
+ if isinstance(link.extra_fields["country"], list):
337
+ countries.extend(link.extra_fields["country"])
338
+ else:
339
+ countries.append(link.extra_fields["country"])
340
+
341
+ parent_collection.add_links(links)
342
+
343
+ add_collection_information(catalog_config, parent_collection, collection_config)
344
+ add_process_info(parent_collection, catalog_config, collection_config)
345
+ parent_collection.update_extent_from_items()
346
+ # Add bbox extents from children
347
+ for c_child in parent_collection.get_children():
348
+ if isinstance(c_child, Collection):
349
+ parent_collection.extent.spatial.bboxes.append(c_child.extent.spatial.bboxes[0])
350
+ # Fill summaries for locations
351
+ parent_collection.summaries = Summaries(
352
+ {
353
+ "cities": list(set(locations)),
354
+ "countries": list(set(countries)),
355
+ }
356
+ )
357
+ add_to_catalog(parent_collection, catalog, {}, collection_config)
369
358
 
370
359
 
371
360
  def add_to_catalog(
@@ -451,6 +440,11 @@ def add_to_catalog(
451
440
  is_flag=True,
452
441
  help="generate additionally thumbnail image for supported collections",
453
442
  )
443
+ @click.option(
444
+ "-gp",
445
+ is_flag=True,
446
+ help="generates the items in .parquet format",
447
+ )
454
448
  @click.argument(
455
449
  "collections",
456
450
  nargs=-1,
@@ -464,6 +458,7 @@ def process_catalogs(
464
458
  vd,
465
459
  ni,
466
460
  tn,
461
+ gp,
467
462
  collections,
468
463
  ):
469
464
  """STAC generator and harvester:
@@ -478,6 +473,7 @@ def process_catalogs(
478
473
  ni=ni,
479
474
  tn=tn,
480
475
  collections=collections,
476
+ gp=gp,
481
477
  )
482
478
  tasks = []
483
479
  for file_name in os.listdir(catalogspath):
@@ -2,7 +2,6 @@ from datetime import datetime
2
2
 
3
3
  import requests
4
4
  import spdx_lookup as lookup
5
- import yaml
6
5
  from pystac import (
7
6
  Asset,
8
7
  Catalog,
@@ -15,12 +14,12 @@ from pystac import (
15
14
  TemporalExtent,
16
15
  )
17
16
  from structlog import get_logger
18
- from yaml.loader import SafeLoader
19
17
 
20
18
  from eodash_catalog.utils import (
21
19
  generateDatetimesFromInterval,
22
20
  get_full_url,
23
21
  parse_datestring_to_tz_aware_datetime,
22
+ read_config_file,
24
23
  )
25
24
 
26
25
  LOGGER = get_logger(__name__)
@@ -214,7 +213,10 @@ def add_example_info(
214
213
 
215
214
 
216
215
  def add_collection_information(
217
- catalog_config: dict, collection: Collection, collection_config: dict
216
+ catalog_config: dict,
217
+ collection: Collection,
218
+ collection_config: dict,
219
+ is_root_collection: bool = False,
218
220
  ) -> None:
219
221
  # Add metadata information
220
222
  # Check license identifier
@@ -323,7 +325,7 @@ def add_collection_information(
323
325
  f'{catalog_config["assets_endpoint"]}/' f'{collection_config["Image"]}'
324
326
  )
325
327
  # Add extra fields to collection if available
326
- add_extra_fields(collection, collection_config)
328
+ add_extra_fields(collection, collection_config, is_root_collection)
327
329
 
328
330
  if "References" in collection_config:
329
331
  generic_counter = 1
@@ -347,7 +349,29 @@ def add_collection_information(
347
349
 
348
350
 
349
351
  def add_process_info(collection: Collection, catalog_config: dict, collection_config: dict) -> None:
350
- if "Process" in collection_config:
352
+ if any(key in collection_config for key in ["Locations", "Subcollections"]):
353
+ # add the generic geodb-like selection process on the root collection instead of Processes
354
+ if "geodb_default_form" in catalog_config:
355
+ # adding default geodb-like map handling for Locations
356
+ collection.extra_fields["eodash:jsonform"] = get_full_url(
357
+ catalog_config["geodb_default_form"], catalog_config
358
+ )
359
+ # link a process definition for getting a collection with {{feature}} placeholder
360
+ sl = Link(
361
+ rel="service",
362
+ target="./" + collection.id + "/{{feature}}/collection.json",
363
+ media_type="application/json; profile=collection",
364
+ extra_fields={
365
+ "id": "locations",
366
+ "method": "GET",
367
+ "type": "application/json; profile=collection",
368
+ "endpoint": "STAC",
369
+ },
370
+ )
371
+ collection.add_link(sl)
372
+ # elif is intentional for cases when Process is defined on collection with Locations
373
+ # then we want to only add it to the "children", not the root
374
+ elif "Process" in collection_config:
351
375
  if "EndPoints" in collection_config["Process"]:
352
376
  for endpoint in collection_config["Process"]["EndPoints"]:
353
377
  collection.add_link(create_service_link(endpoint, catalog_config))
@@ -390,6 +414,24 @@ def add_process_info(collection: Collection, catalog_config: dict, collection_co
390
414
  )
391
415
 
392
416
 
417
+ def add_process_info_child_collection(
418
+ collection: Collection, catalog_config: dict, collection_config: dict
419
+ ) -> None:
420
+ # in case of locations, we add the process itself on a child collection
421
+ if "Process" in collection_config:
422
+ if "EndPoints" in collection_config["Process"]:
423
+ for endpoint in collection_config["Process"]["EndPoints"]:
424
+ collection.add_link(create_service_link(endpoint, catalog_config))
425
+ if "JsonForm" in collection_config["Process"]:
426
+ collection.extra_fields["eodash:jsonform"] = get_full_url(
427
+ collection_config["Process"]["JsonForm"], catalog_config
428
+ )
429
+ if "VegaDefinition" in collection_config["Process"]:
430
+ collection.extra_fields["eodash:vegadefinition"] = get_full_url(
431
+ collection_config["Process"]["VegaDefinition"], catalog_config
432
+ )
433
+
434
+
393
435
  def add_base_overlay_info(
394
436
  collection: Collection, catalog_config: dict, collection_config: dict
395
437
  ) -> None:
@@ -399,28 +441,30 @@ def add_base_overlay_info(
399
441
  collection.add_link(create_web_map_link(layer, role="baselayer"))
400
442
  # alternatively use default base layers defined
401
443
  elif "default_base_layers" in catalog_config:
402
- with open(f'{catalog_config["default_base_layers"]}.yaml') as f:
403
- base_layers = yaml.load(f, Loader=SafeLoader)
404
- for layer in base_layers:
405
- collection.add_link(create_web_map_link(layer, role="baselayer"))
444
+ base_layers = read_config_file(catalog_config["default_base_layers"])
445
+ for layer in base_layers:
446
+ collection.add_link(create_web_map_link(layer, role="baselayer"))
406
447
  # add custom overlays just for this indicator
407
448
  if "OverlayLayers" in collection_config:
408
449
  for layer in collection_config["OverlayLayers"]:
409
450
  collection.add_link(create_web_map_link(layer, role="overlay"))
410
451
  # check if default overlay layers defined
411
452
  elif "default_overlay_layers" in catalog_config:
412
- with open("{}.yaml".format(catalog_config["default_overlay_layers"])) as f:
413
- overlay_layers = yaml.load(f, Loader=SafeLoader)
414
- for layer in overlay_layers:
415
- collection.add_link(create_web_map_link(layer, role="overlay"))
453
+ overlay_layers = read_config_file(catalog_config["default_overlay_layers"])
454
+ for layer in overlay_layers:
455
+ collection.add_link(create_web_map_link(layer, role="overlay"))
416
456
 
417
457
 
418
- def add_extra_fields(stac_object: Collection | Link, collection_config: dict) -> None:
458
+ def add_extra_fields(
459
+ stac_object: Collection | Link, collection_config: dict, is_root_collection: bool = False
460
+ ) -> None:
419
461
  if "yAxis" in collection_config:
420
462
  stac_object.extra_fields["yAxis"] = collection_config["yAxis"]
421
463
  if "Themes" in collection_config:
422
464
  stac_object.extra_fields["themes"] = collection_config["Themes"]
423
- if "Locations" in collection_config or "Subcollections" in collection_config:
465
+ if (
466
+ "Locations" in collection_config or "Subcollections" in collection_config
467
+ ) and is_root_collection:
424
468
  stac_object.extra_fields["locations"] = True
425
469
  if "Tags" in collection_config:
426
470
  stac_object.extra_fields["tags"] = collection_config["Tags"]
eodash_catalog/utils.py CHANGED
@@ -1,3 +1,4 @@
1
+ import json
1
2
  import os
2
3
  import re
3
4
  import threading
@@ -10,11 +11,13 @@ from decimal import Decimal
10
11
  from functools import reduce, wraps
11
12
  from typing import Any
12
13
 
14
+ import stac_geoparquet as stacgp
15
+ import yaml
13
16
  from dateutil import parser
14
17
  from owslib.wcs import WebCoverageService
15
18
  from owslib.wms import WebMapService
16
19
  from owslib.wmts import WebMapTileService
17
- from pystac import Catalog, Collection, Item, RelType
20
+ from pystac import Asset, Catalog, Collection, Item, Link, RelType
18
21
  from pytz import timezone as pytztimezone
19
22
  from six import string_types
20
23
  from structlog import get_logger
@@ -233,14 +236,47 @@ class RaisingThread(threading.Thread):
233
236
  raise self._exc
234
237
 
235
238
 
236
- def recursive_save(stac_object: Catalog, no_items: bool = False) -> None:
237
- stac_object.save_object()
239
+ def recursive_save(stac_object: Catalog, no_items: bool = False, geo_parquet: bool = False) -> None:
238
240
  for child in stac_object.get_children():
239
- recursive_save(child, no_items)
241
+ recursive_save(child, no_items, geo_parquet)
240
242
  if not no_items:
241
- # try to save items if available
242
- for item in stac_object.get_items():
243
- item.save_object()
243
+ if geo_parquet:
244
+ create_geoparquet_items(stac_object)
245
+ else:
246
+ for item in stac_object.get_items():
247
+ item.save_object()
248
+ stac_object.save_object()
249
+
250
+
251
+ def create_geoparquet_items(stacObject: Catalog):
252
+ if iter_len_at_least(stacObject.get_items(), 1):
253
+ stac_dir_arr = stacObject.self_href.split("/")
254
+ stac_dir_arr.pop()
255
+ stac_dir_path = "/".join(stac_dir_arr)
256
+ items_stacgp_path = f"{stac_dir_path}/items.parquet"
257
+ to_stac_geoparquet(stacObject, items_stacgp_path)
258
+ gp_link = Link(
259
+ rel="items",
260
+ target=items_stacgp_path,
261
+ media_type="application/vnd.apache.parquet",
262
+ title="GeoParquet Items",
263
+ )
264
+ stacObject.clear_links(rel="item")
265
+ stacObject.add_links([gp_link])
266
+
267
+
268
+ def to_stac_geoparquet(stacObject: Catalog, path: str):
269
+ items = []
270
+ for item in stacObject.get_items():
271
+ if not item.geometry:
272
+ item.geometry = create_geojson_point(0, 0)["geometry"]
273
+ if not item.assets:
274
+ item.assets = {"dummy_asset": Asset(href="")}
275
+ items.append(item.to_dict())
276
+ record_batch_reader = stacgp.arrow.parse_stac_items_to_arrow(items)
277
+ table = record_batch_reader.read_all()
278
+ os.makedirs(os.path.dirname(path), exist_ok=True)
279
+ stacgp.arrow.to_parquet(table, path)
244
280
 
245
281
 
246
282
  def iter_len_at_least(i, n: int) -> int:
@@ -289,6 +325,7 @@ class Options:
289
325
  vd: bool
290
326
  ni: bool
291
327
  tn: bool
328
+ gp: bool
292
329
  collections: list[str]
293
330
 
294
331
 
@@ -392,3 +429,32 @@ def get_full_url(url: str, catalog_config) -> str:
392
429
  return url
393
430
  else:
394
431
  return f'{catalog_config["assets_endpoint"]}{url}'
432
+
433
+
434
+ def read_config_file(path: str) -> dict:
435
+ # If the given path exists directly, use it
436
+ if os.path.exists(path):
437
+ return _load_file(path)
438
+
439
+ # Otherwise, try appending supported suffixes
440
+ for suffix in [".json", ".yaml", ".yml", ".JSON", ".YAML", ".YML"]:
441
+ candidate = path + suffix
442
+ if os.path.exists(candidate):
443
+ return _load_file(candidate)
444
+
445
+ raise FileNotFoundError(
446
+ f"No file found for '{path}' with or without supported suffixes (.json/.yaml/.yml)"
447
+ )
448
+
449
+
450
+ def _load_file(filepath):
451
+ with open(filepath) as file:
452
+ content = file.read()
453
+ try:
454
+ return json.loads(content)
455
+ except json.JSONDecodeError:
456
+ pass
457
+ try:
458
+ return yaml.safe_load(content)
459
+ except yaml.YAMLError as err:
460
+ raise ValueError(f"Failed to parse '{filepath}' as JSON or YAML: {err}") from err
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eodash_catalog
3
- Version: 0.1.8
3
+ Version: 0.1.10
4
4
  Summary: This package is intended to help create a compatible STAC catalog for the eodash dashboard client. It supports configuration of multiple endpoint types for information extraction.
5
5
  Project-URL: Documentation, https://github.com/eodash/eodash_catalog#readme
6
6
  Project-URL: Issues, https://github.com/eodash/eodash_catalog/issues
@@ -33,6 +33,7 @@ Requires-Dist: requests-oauthlib<1.3.2
33
33
  Requires-Dist: requests<3
34
34
  Requires-Dist: setuptools<71
35
35
  Requires-Dist: spdx-lookup<=0.3.3
36
+ Requires-Dist: stac-geoparquet<=0.6.0
36
37
  Requires-Dist: structlog<22.0
37
38
  Requires-Dist: swiftspec==0.0.2
38
39
  Provides-Extra: dev
@@ -0,0 +1,14 @@
1
+ eodash_catalog/__about__.py,sha256=tt63_uEHezTdcfiE37lvE0cZyuSRlO26qsS2Zzcj3-4,138
2
+ eodash_catalog/__init__.py,sha256=_W_9emPYf6FUqc0P8L2SmADx6hGSd7PlQV3yRmCk5uM,115
3
+ eodash_catalog/duration.py,sha256=B6XOZfvNU7SuqpxuVtT1kNKODoOQJXDI6mocvA_U1ik,10816
4
+ eodash_catalog/endpoints.py,sha256=fQ3kaDCcdPypxrZzvAtWYqSt5nVq8cbCDOfH6BlbIc8,41835
5
+ eodash_catalog/generate_indicators.py,sha256=V2a6r3lS_BoZvwoywHFaT-gBu3INRp4E4zLZioD6ang,19493
6
+ eodash_catalog/sh_endpoint.py,sha256=XjZsZJ5jfJZLQenSTqUhiUZ5YAu9M9nv2KL1Qv3Be-I,1219
7
+ eodash_catalog/stac_handling.py,sha256=TuwJ6AOA-1aJglAFtWZmtimZO5_wvIkpU64XHcKI7rk,23254
8
+ eodash_catalog/thumbnails.py,sha256=qZDcpQe80ki6lEMKYdZtSnnHH0PUpcoXTvU9bYdPlzU,2260
9
+ eodash_catalog/utils.py,sha256=zZ4PT1h1lWCMvEIfg5Xlw4re86pwR40BX8jEDweiTJ0,16115
10
+ eodash_catalog-0.1.10.dist-info/METADATA,sha256=40cVmtNiUbJbofbuUDh1-I4rLThX7qO_D7BqW3STcnE,3234
11
+ eodash_catalog-0.1.10.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
12
+ eodash_catalog-0.1.10.dist-info/entry_points.txt,sha256=kuUQrDG1PtYd8kPjf5XM6H_NtQd9Ozwl0jjiGtAvZSM,87
13
+ eodash_catalog-0.1.10.dist-info/licenses/LICENSE.txt,sha256=oJCW5zQxnFD-J0hGz6Zh5Lkpdk1oAndmWhseTmV224E,1107
14
+ eodash_catalog-0.1.10.dist-info/RECORD,,
@@ -1,14 +0,0 @@
1
- eodash_catalog/__about__.py,sha256=Suv8M7NmJ_IR_KeM80feexn6iHCp66qCdMfm0fV59ac,137
2
- eodash_catalog/__init__.py,sha256=_W_9emPYf6FUqc0P8L2SmADx6hGSd7PlQV3yRmCk5uM,115
3
- eodash_catalog/duration.py,sha256=B6XOZfvNU7SuqpxuVtT1kNKODoOQJXDI6mocvA_U1ik,10816
4
- eodash_catalog/endpoints.py,sha256=b0vIQOLGqANm7qaaAap0IeW11yPzjo6-VXqcKCY41ag,41543
5
- eodash_catalog/generate_indicators.py,sha256=jqPns3KOZA8GEcxGYD6NcYa5iGtjKcrKkW7LLv13PRQ,20634
6
- eodash_catalog/sh_endpoint.py,sha256=XjZsZJ5jfJZLQenSTqUhiUZ5YAu9M9nv2KL1Qv3Be-I,1219
7
- eodash_catalog/stac_handling.py,sha256=8_W45SIXV4jAUa9J-f2KZ2eor6MNfPfy0n1d-932eAE,21234
8
- eodash_catalog/thumbnails.py,sha256=qZDcpQe80ki6lEMKYdZtSnnHH0PUpcoXTvU9bYdPlzU,2260
9
- eodash_catalog/utils.py,sha256=QhZeMXRC1uvE3VzC3T_nMi9hn_3RR05s5rkzs8tCeQc,13866
10
- eodash_catalog-0.1.8.dist-info/METADATA,sha256=7ETkKNNhQX2F6eH6scnpd0gBfS-VWUBkG7piWHQfmJU,3195
11
- eodash_catalog-0.1.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
12
- eodash_catalog-0.1.8.dist-info/entry_points.txt,sha256=kuUQrDG1PtYd8kPjf5XM6H_NtQd9Ozwl0jjiGtAvZSM,87
13
- eodash_catalog-0.1.8.dist-info/licenses/LICENSE.txt,sha256=oJCW5zQxnFD-J0hGz6Zh5Lkpdk1oAndmWhseTmV224E,1107
14
- eodash_catalog-0.1.8.dist-info/RECORD,,