eodash_catalog 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of eodash_catalog might be problematic. Click here for more details.

@@ -1,4 +1,4 @@
1
1
  # SPDX-FileCopyrightText: 2024-present Daniel Santillan <daniel.santillan@eox.at>
2
2
  #
3
3
  # SPDX-License-Identifier: MIT
4
- __version__ = "0.1.7"
4
+ __version__ = "0.1.9"
@@ -528,7 +528,7 @@ def handle_GeoDB_endpoint(
528
528
  id=IdValue,
529
529
  bbox=bbox,
530
530
  properties={},
531
- geometry=create_geojson_point(lon, lat),
531
+ geometry=create_geojson_point(lon, lat)["geometry"],
532
532
  datetime=None,
533
533
  start_datetime=min_date,
534
534
  end_datetime=max_date,
@@ -825,7 +825,7 @@ def add_visualization_info(
825
825
  data_projection = str(endpoint_config.get("DataProjection", 3857))
826
826
  epsg_prefix = "" if "EPSG:" in data_projection else "EPSG:"
827
827
  crs = f"{epsg_prefix}{data_projection}"
828
- time = stac_object.get_datetime() if isinstance(stac_object, Item) else "{{time}}"
828
+ time = stac_object.get_datetime() if isinstance(stac_object, Item) else "{time}"
829
829
  target_url = (
830
830
  "{}/tiles/{}/{}/{{z}}/{{y}}/{{x}}" "?crs={}&time={}&vmin={}&vmax={}&cbar={}"
831
831
  ).format(
@@ -990,7 +990,7 @@ def handle_raw_source(
990
990
  id=format_datetime_to_isostring_zulu(dt),
991
991
  bbox=bbox,
992
992
  properties={},
993
- geometry=create_geojson_from_bbox(bbox),
993
+ geometry=create_geojson_from_bbox(bbox)["features"][0]["geometry"],
994
994
  datetime=dt,
995
995
  assets=assets,
996
996
  extra_fields={},
@@ -9,13 +9,11 @@ import time
9
9
  from typing import Any
10
10
 
11
11
  import click
12
- import yaml
13
12
  from dotenv import load_dotenv
14
13
  from pystac import Catalog, CatalogType, Collection, Link, Summaries
15
14
  from pystac.layout import TemplateLayoutStrategy
16
15
  from pystac.validation import validate_all
17
16
  from structlog import get_logger
18
- from yaml.loader import SafeLoader
19
17
 
20
18
  from eodash_catalog.endpoints import (
21
19
  handle_collection_only,
@@ -42,6 +40,7 @@ from eodash_catalog.utils import (
42
40
  RaisingThread,
43
41
  add_single_item_if_collection_empty,
44
42
  iter_len_at_least,
43
+ read_config_file,
45
44
  recursive_save,
46
45
  retry,
47
46
  )
@@ -53,76 +52,72 @@ LOGGER = get_logger(__name__)
53
52
 
54
53
  def process_catalog_file(file_path: str, options: Options):
55
54
  LOGGER.info(f"Processing catalog: {file_path}")
56
- with open(file_path) as f:
57
- catalog_config: dict = yaml.load(f, Loader=SafeLoader)
58
-
59
- if len(options.collections) > 0:
60
- # create only catalogs containing the passed collections
61
- process_collections = [
62
- c for c in catalog_config["collections"] if c in options.collections
63
- ]
64
- elif (len(options.collections) == 1 and options.collections == "all") or len(
65
- options.collections
66
- ) == 0:
67
- # create full catalog
68
- process_collections = catalog_config["collections"]
69
- if len(process_collections) == 0:
70
- LOGGER.info("No applicable collections found for catalog, skipping creation")
71
- return
72
- catalog = Catalog(
73
- id=catalog_config["id"],
74
- description=catalog_config["description"],
75
- title=catalog_config["title"],
76
- catalog_type=CatalogType.RELATIVE_PUBLISHED,
77
- )
78
- for collection in process_collections:
79
- file_path = f"{options.collectionspath}/{collection}.yaml"
80
- if os.path.isfile(file_path):
81
- # if collection file exists process it as indicator
82
- # collection will be added as single collection to indicator
83
- process_indicator_file(catalog_config, file_path, catalog, options)
84
- else:
85
- # if not try to see if indicator definition available
86
- file_path = f"{options.indicatorspath}/{collection}.yaml"
87
- if os.path.isfile(file_path):
88
- process_indicator_file(
89
- catalog_config,
90
- f"{options.indicatorspath}/{collection}.yaml",
91
- catalog,
92
- options,
93
- )
94
- else:
95
- LOGGER.info(f"Warning: neither collection nor indicator found for {collection}")
96
- if "MapProjection" in catalog_config:
97
- catalog.extra_fields["eodash:mapProjection"] = catalog_config["MapProjection"]
55
+ catalog_config: dict = read_config_file(file_path)
56
+ if len(options.collections) > 0:
57
+ # create only catalogs containing the passed collections
58
+ process_collections = [c for c in catalog_config["collections"] if c in options.collections]
59
+ elif (len(options.collections) == 1 and options.collections == "all") or len(
60
+ options.collections
61
+ ) == 0:
62
+ # create full catalog
63
+ process_collections = catalog_config["collections"]
64
+ if len(process_collections) == 0:
65
+ LOGGER.info("No applicable collections found for catalog, skipping creation")
66
+ return
67
+ catalog = Catalog(
68
+ id=catalog_config["id"],
69
+ description=catalog_config["description"],
70
+ title=catalog_config["title"],
71
+ catalog_type=CatalogType.RELATIVE_PUBLISHED,
72
+ )
73
+ for collection in process_collections:
74
+ file_path = f"{options.collectionspath}/{collection}"
75
+ try:
76
+ # if collection file exists process it as indicator
77
+ # collection will be added as single collection to indicator
78
+ process_indicator_file(catalog_config, file_path, catalog, options)
79
+ except FileNotFoundError:
80
+ # if not exists try to see if indicator definition available
81
+ file_path_indicator = f"{options.indicatorspath}/{collection}"
82
+ try:
83
+ process_indicator_file(
84
+ catalog_config,
85
+ file_path_indicator,
86
+ catalog,
87
+ options,
88
+ )
89
+ except FileNotFoundError:
90
+ LOGGER.info(f"Warning: neither collection nor indicator found for {collection}")
91
+ if "MapProjection" in catalog_config:
92
+ catalog.extra_fields["eodash:mapProjection"] = catalog_config["MapProjection"]
98
93
 
99
- strategy = TemplateLayoutStrategy(item_template="${collection}/${year}")
100
- # expecting that the catalog will be hosted online, self url should correspond to that
101
- # default to a local folder + catalog id in case not set
94
+ strategy = TemplateLayoutStrategy(item_template="${collection}/${year}")
95
+ # expecting that the catalog will be hosted online, self url should correspond to that
96
+ # default to a local folder + catalog id in case not set
102
97
 
103
- LOGGER.info("Started creation of collection files")
104
- start = time.time()
105
- if options.ni:
106
- catalog_self_href = f'{options.outputpath}/{catalog_config["id"]}'
107
- catalog.normalize_hrefs(catalog_self_href, strategy=strategy)
108
- recursive_save(catalog, options.ni)
109
- else:
110
- # For full catalog save with items this still seems to be faster
111
- catalog_self_href = catalog_config.get(
112
- "endpoint", "{}/{}".format(options.outputpath, catalog_config["id"])
113
- )
114
- catalog.normalize_hrefs(catalog_self_href, strategy=strategy)
115
- catalog.save(dest_href="{}/{}".format(options.outputpath, catalog_config["id"]))
116
- end = time.time()
117
- LOGGER.info(f"Catalog {catalog_config['id']}: Time consumed in saving: {end - start}")
98
+ LOGGER.info("Started creation of collection files")
99
+ start = time.time()
100
+ if options.ni or options.gp:
101
+ catalog_self_href = f'{options.outputpath}/{catalog_config["id"]}'
102
+ catalog.normalize_hrefs(catalog_self_href, strategy=strategy)
103
+ recursive_save(catalog, options.ni, options.gp)
104
+ else:
105
+ # For full catalog save with items this still seems to be faster
106
+ catalog_self_href = catalog_config.get(
107
+ "endpoint", "{}/{}".format(options.outputpath, catalog_config["id"])
108
+ )
109
+ catalog.normalize_hrefs(catalog_self_href, strategy=strategy)
110
+ catalog.save(dest_href="{}/{}".format(options.outputpath, catalog_config["id"]))
111
+ end = time.time()
112
+ LOGGER.info(f"Catalog {catalog_config['id']}: Time consumed in saving: {end - start}")
118
113
 
119
- if options.vd:
120
- # try to validate catalog if flag was set
121
- LOGGER.info(f"Running validation of catalog {file_path}")
122
- try:
123
- validate_all(catalog.to_dict(), href=catalog_config["endpoint"])
124
- except Exception as e:
125
- LOGGER.info(f"Issue validation collection: {e}")
114
+ if options.vd:
115
+ # try to validate catalog if flag was set
116
+ LOGGER.info(f"Running validation of catalog {file_path}")
117
+ try:
118
+ validate_all(catalog.to_dict(), href=catalog_config["endpoint"])
119
+ except Exception as e:
120
+ LOGGER.info(f"Issue validation collection: {e}")
126
121
 
127
122
 
128
123
  def extract_indicator_info(parent_collection: Collection):
@@ -166,37 +161,36 @@ def extract_indicator_info(parent_collection: Collection):
166
161
  def process_indicator_file(
167
162
  catalog_config: dict, file_path: str, catalog: Catalog, options: Options
168
163
  ):
169
- with open(file_path) as f:
170
- LOGGER.info(f"Processing indicator: {file_path}")
171
- indicator_config: dict = yaml.load(f, Loader=SafeLoader)
172
- parent_indicator = get_or_create_collection(
173
- catalog, indicator_config["Name"], indicator_config, catalog_config, {}
174
- )
175
- if "Collections" in indicator_config:
176
- for collection in indicator_config["Collections"]:
177
- process_collection_file(
178
- catalog_config,
179
- f"{options.collectionspath}/{collection}.yaml",
180
- parent_indicator,
181
- options,
182
- "Disable" in indicator_config and collection in indicator_config["Disable"],
183
- )
184
- else:
185
- # we assume that collection files can also be loaded directly
186
- process_collection_file(catalog_config, file_path, parent_indicator, options)
187
- add_collection_information(catalog_config, parent_indicator, indicator_config)
188
- if iter_len_at_least(parent_indicator.get_items(recursive=True), 1):
189
- parent_indicator.update_extent_from_items()
190
- # Add bbox extents from children
191
- for c_child in parent_indicator.get_children():
192
- if isinstance(c_child, Collection): # typing reason
193
- parent_indicator.extent.spatial.bboxes.append(c_child.extent.spatial.bboxes[0])
194
- # extract collection information and add it to summary indicator level
195
- extract_indicator_info(parent_indicator)
196
- add_process_info(parent_indicator, catalog_config, indicator_config)
197
- # add baselayer and overview information to indicator collection
198
- add_base_overlay_info(parent_indicator, catalog_config, indicator_config)
199
- add_to_catalog(parent_indicator, catalog, {}, indicator_config)
164
+ LOGGER.info(f"Processing indicator: {file_path}")
165
+ indicator_config = read_config_file(file_path)
166
+ parent_indicator = get_or_create_collection(
167
+ catalog, indicator_config["Name"], indicator_config, catalog_config, {}
168
+ )
169
+ if "Collections" in indicator_config:
170
+ for collection in indicator_config["Collections"]:
171
+ process_collection_file(
172
+ catalog_config,
173
+ f"{options.collectionspath}/{collection}",
174
+ parent_indicator,
175
+ options,
176
+ "Disable" in indicator_config and collection in indicator_config["Disable"],
177
+ )
178
+ else:
179
+ # we assume that collection files can also be loaded directly
180
+ process_collection_file(catalog_config, file_path, parent_indicator, options)
181
+ add_collection_information(catalog_config, parent_indicator, indicator_config)
182
+ if iter_len_at_least(parent_indicator.get_items(recursive=True), 1):
183
+ parent_indicator.update_extent_from_items()
184
+ # Add bbox extents from children
185
+ for c_child in parent_indicator.get_children():
186
+ if isinstance(c_child, Collection): # typing reason
187
+ parent_indicator.extent.spatial.bboxes.append(c_child.extent.spatial.bboxes[0])
188
+ # extract collection information and add it to summary indicator level
189
+ extract_indicator_info(parent_indicator)
190
+ add_process_info(parent_indicator, catalog_config, indicator_config)
191
+ # add baselayer and overview information to indicator collection
192
+ add_base_overlay_info(parent_indicator, catalog_config, indicator_config)
193
+ add_to_catalog(parent_indicator, catalog, {}, indicator_config)
200
194
 
201
195
 
202
196
  @retry((Exception), tries=3, delay=5, backoff=2, logger=LOGGER)
@@ -208,164 +202,159 @@ def process_collection_file(
208
202
  disable=False,
209
203
  ):
210
204
  LOGGER.info(f"Processing collection: {file_path}")
211
- with open(file_path) as f:
212
- collection_config: dict = yaml.load(f, Loader=SafeLoader)
213
- if "Resources" in collection_config:
214
- for endpoint_config in collection_config["Resources"]:
215
- try:
216
- collection = None
217
- if endpoint_config["Name"] == "Sentinel Hub":
218
- collection = handle_SH_endpoint(
219
- catalog_config, endpoint_config, collection_config, catalog, options
220
- )
221
- elif endpoint_config["Name"] == "Sentinel Hub WMS":
222
- collection = handle_SH_WMS_endpoint(
223
- catalog_config, endpoint_config, collection_config, catalog
224
- )
225
- elif endpoint_config["Name"] == "GeoDB":
226
- collection = handle_GeoDB_endpoint(
227
- catalog_config, endpoint_config, collection_config, catalog
228
- )
229
- elif endpoint_config["Name"] == "VEDA":
230
- collection = handle_VEDA_endpoint(
231
- catalog_config, endpoint_config, collection_config, catalog, options
232
- )
233
- elif endpoint_config["Name"] == "marinedatastore":
234
- collection = handle_WMS_endpoint(
235
- catalog_config, endpoint_config, collection_config, catalog, wmts=True
236
- )
237
- elif endpoint_config["Name"] == "xcube":
238
- collection = handle_xcube_endpoint(
239
- catalog_config, endpoint_config, collection_config, catalog
240
- )
241
- elif endpoint_config["Name"] == "rasdaman":
242
- collection = handle_rasdaman_endpoint(
243
- catalog_config, endpoint_config, collection_config, catalog
244
- )
245
- elif endpoint_config["Name"] == "WMS":
246
- collection = handle_WMS_endpoint(
247
- catalog_config, endpoint_config, collection_config, catalog
248
- )
249
- elif endpoint_config["Name"] == "JAXA_WMTS_PALSAR":
250
- # somewhat one off creation of individual WMTS layers as individual items
251
- collection = handle_WMS_endpoint(
252
- catalog_config, endpoint_config, collection_config, catalog, wmts=True
253
- )
254
- elif endpoint_config["Name"] == "Collection-only":
255
- collection = handle_collection_only(
256
- catalog_config, endpoint_config, collection_config, catalog
257
- )
258
- elif endpoint_config["Name"] == "Custom-Endpoint":
259
- collection = handle_custom_endpoint(
260
- catalog_config,
261
- endpoint_config,
262
- collection_config,
263
- catalog,
264
- )
265
- elif endpoint_config["Name"] in [
266
- "COG source",
267
- "GeoJSON source",
268
- "FlatGeobuf source",
269
- ]:
270
- collection = handle_raw_source(
271
- catalog_config, endpoint_config, collection_config, catalog
272
- )
273
- else:
274
- raise ValueError("Type of Resource is not supported")
275
- if collection:
276
- add_single_item_if_collection_empty(collection)
277
- add_projection_info(endpoint_config, collection)
278
- add_to_catalog(
279
- collection, catalog, endpoint_config, collection_config, disable
280
- )
281
- else:
282
- raise Exception(
283
- f"No collection was generated for resource {endpoint_config}"
284
- )
285
- except Exception as e:
286
- LOGGER.warn(f"""Exception: {e.args[0]} with config: {endpoint_config}""")
287
- raise e
288
-
289
- elif "Subcollections" in collection_config:
290
- # if no endpoint is specified we check for definition of subcollections
291
- parent_collection = get_or_create_collection(
292
- catalog, collection_config["Name"], collection_config, catalog_config, {}
293
- )
294
-
295
- locations = []
296
- countries = []
297
- for sub_coll_def in collection_config["Subcollections"]:
298
- # Subcollection has only data on one location which
299
- # is defined for the entire collection
300
- if "Name" in sub_coll_def and "Point" in sub_coll_def:
301
- locations.append(sub_coll_def["Name"])
302
- if isinstance(sub_coll_def["Country"], list):
303
- countries.extend(sub_coll_def["Country"])
304
- else:
305
- countries.append(sub_coll_def["Country"])
306
- process_collection_file(
307
- catalog_config,
308
- "{}/{}.yaml".format(options.collectionspath, sub_coll_def["Collection"]),
309
- parent_collection,
310
- options,
205
+ collection_config = read_config_file(file_path)
206
+ if "Resources" in collection_config:
207
+ for endpoint_config in collection_config["Resources"]:
208
+ try:
209
+ collection = None
210
+ if endpoint_config["Name"] == "Sentinel Hub":
211
+ collection = handle_SH_endpoint(
212
+ catalog_config, endpoint_config, collection_config, catalog, options
311
213
  )
312
- # find link in parent collection to update metadata
313
- for link in parent_collection.links:
314
- if (
315
- link.rel == "child"
316
- and "id" in link.extra_fields
317
- and link.extra_fields["id"] == sub_coll_def["Identifier"]
318
- ):
319
- latlng = "{},{}".format(
320
- sub_coll_def["Point"][1],
321
- sub_coll_def["Point"][0],
322
- )
323
- link.extra_fields["id"] = sub_coll_def["Identifier"]
324
- link.extra_fields["latlng"] = latlng
325
- link.extra_fields["name"] = sub_coll_def["Name"]
326
- # Update title of collection to use location name
327
- sub_collection = parent_collection.get_child(id=sub_coll_def["Identifier"])
328
- if sub_collection:
329
- sub_collection.title = sub_coll_def["Name"]
330
- # The subcollection has multiple locations which need to be extracted
331
- # and elevated to parent collection level
332
- else:
333
- # create temp catalog to save collection
334
- tmp_catalog = Catalog(id="tmp_catalog", description="temp catalog placeholder")
335
- process_collection_file(
214
+ elif endpoint_config["Name"] == "Sentinel Hub WMS":
215
+ collection = handle_SH_WMS_endpoint(
216
+ catalog_config, endpoint_config, collection_config, catalog
217
+ )
218
+ elif endpoint_config["Name"] == "GeoDB":
219
+ collection = handle_GeoDB_endpoint(
220
+ catalog_config, endpoint_config, collection_config, catalog
221
+ )
222
+ elif endpoint_config["Name"] == "VEDA":
223
+ collection = handle_VEDA_endpoint(
224
+ catalog_config, endpoint_config, collection_config, catalog, options
225
+ )
226
+ elif endpoint_config["Name"] == "marinedatastore":
227
+ collection = handle_WMS_endpoint(
228
+ catalog_config, endpoint_config, collection_config, catalog, wmts=True
229
+ )
230
+ elif endpoint_config["Name"] == "xcube":
231
+ collection = handle_xcube_endpoint(
232
+ catalog_config, endpoint_config, collection_config, catalog
233
+ )
234
+ elif endpoint_config["Name"] == "rasdaman":
235
+ collection = handle_rasdaman_endpoint(
236
+ catalog_config, endpoint_config, collection_config, catalog
237
+ )
238
+ elif endpoint_config["Name"] == "WMS":
239
+ collection = handle_WMS_endpoint(
240
+ catalog_config, endpoint_config, collection_config, catalog
241
+ )
242
+ elif endpoint_config["Name"] == "JAXA_WMTS_PALSAR":
243
+ # somewhat one off creation of individual WMTS layers as individual items
244
+ collection = handle_WMS_endpoint(
245
+ catalog_config, endpoint_config, collection_config, catalog, wmts=True
246
+ )
247
+ elif endpoint_config["Name"] == "Collection-only":
248
+ collection = handle_collection_only(
249
+ catalog_config, endpoint_config, collection_config, catalog
250
+ )
251
+ elif endpoint_config["Name"] == "Custom-Endpoint":
252
+ collection = handle_custom_endpoint(
336
253
  catalog_config,
337
- "{}/{}.yaml".format(options.collectionspath, sub_coll_def["Collection"]),
338
- tmp_catalog,
339
- options,
254
+ endpoint_config,
255
+ collection_config,
256
+ catalog,
340
257
  )
341
- links = tmp_catalog.get_child(sub_coll_def["Identifier"]).get_links() # type: ignore
342
- for link in links:
343
- # extract summary information
344
- if "city" in link.extra_fields:
345
- locations.append(link.extra_fields["city"])
346
- if "country" in link.extra_fields:
347
- if isinstance(link.extra_fields["country"], list):
348
- countries.extend(link.extra_fields["country"])
349
- else:
350
- countries.append(link.extra_fields["country"])
258
+ elif endpoint_config["Name"] in [
259
+ "COG source",
260
+ "GeoJSON source",
261
+ "FlatGeobuf source",
262
+ ]:
263
+ collection = handle_raw_source(
264
+ catalog_config, endpoint_config, collection_config, catalog
265
+ )
266
+ else:
267
+ raise ValueError("Type of Resource is not supported")
268
+ if collection:
269
+ add_single_item_if_collection_empty(collection)
270
+ add_projection_info(endpoint_config, collection)
271
+ add_to_catalog(collection, catalog, endpoint_config, collection_config, disable)
272
+ else:
273
+ raise Exception(f"No collection was generated for resource {endpoint_config}")
274
+ except Exception as e:
275
+ LOGGER.warn(f"""Exception: {e.args[0]} with config: {endpoint_config}""")
276
+ raise e
351
277
 
352
- parent_collection.add_links(links)
278
+ elif "Subcollections" in collection_config:
279
+ # if no endpoint is specified we check for definition of subcollections
280
+ parent_collection = get_or_create_collection(
281
+ catalog, collection_config["Name"], collection_config, catalog_config, {}
282
+ )
353
283
 
354
- add_collection_information(catalog_config, parent_collection, collection_config)
355
- add_process_info(catalog_config, parent_collection, collection_config)
356
- parent_collection.update_extent_from_items()
357
- # Add bbox extents from children
358
- for c_child in parent_collection.get_children():
359
- if isinstance(c_child, Collection):
360
- parent_collection.extent.spatial.bboxes.append(c_child.extent.spatial.bboxes[0])
361
- # Fill summaries for locations
362
- parent_collection.summaries = Summaries(
363
- {
364
- "cities": list(set(locations)),
365
- "countries": list(set(countries)),
366
- }
367
- )
368
- add_to_catalog(parent_collection, catalog, {}, collection_config)
284
+ locations = []
285
+ countries = []
286
+ for sub_coll_def in collection_config["Subcollections"]:
287
+ # Subcollection has only data on one location which
288
+ # is defined for the entire collection
289
+ if "Name" in sub_coll_def and "Point" in sub_coll_def:
290
+ locations.append(sub_coll_def["Name"])
291
+ if isinstance(sub_coll_def["Country"], list):
292
+ countries.extend(sub_coll_def["Country"])
293
+ else:
294
+ countries.append(sub_coll_def["Country"])
295
+ process_collection_file(
296
+ catalog_config,
297
+ "{}/{}".format(options.collectionspath, sub_coll_def["Collection"]),
298
+ parent_collection,
299
+ options,
300
+ )
301
+ # find link in parent collection to update metadata
302
+ for link in parent_collection.links:
303
+ if (
304
+ link.rel == "child"
305
+ and "id" in link.extra_fields
306
+ and link.extra_fields["id"] == sub_coll_def["Identifier"]
307
+ ):
308
+ latlng = "{},{}".format(
309
+ sub_coll_def["Point"][1],
310
+ sub_coll_def["Point"][0],
311
+ )
312
+ link.extra_fields["id"] = sub_coll_def["Identifier"]
313
+ link.extra_fields["latlng"] = latlng
314
+ link.extra_fields["name"] = sub_coll_def["Name"]
315
+ # Update title of collection to use location name
316
+ sub_collection = parent_collection.get_child(id=sub_coll_def["Identifier"])
317
+ if sub_collection:
318
+ sub_collection.title = sub_coll_def["Name"]
319
+ # The subcollection has multiple locations which need to be extracted
320
+ # and elevated to parent collection level
321
+ else:
322
+ # create temp catalog to save collection
323
+ tmp_catalog = Catalog(id="tmp_catalog", description="temp catalog placeholder")
324
+ process_collection_file(
325
+ catalog_config,
326
+ "{}/{}".format(options.collectionspath, sub_coll_def["Collection"]),
327
+ tmp_catalog,
328
+ options,
329
+ )
330
+ links = tmp_catalog.get_child(sub_coll_def["Identifier"]).get_links() # type: ignore
331
+ for link in links:
332
+ # extract summary information
333
+ if "city" in link.extra_fields:
334
+ locations.append(link.extra_fields["city"])
335
+ if "country" in link.extra_fields:
336
+ if isinstance(link.extra_fields["country"], list):
337
+ countries.extend(link.extra_fields["country"])
338
+ else:
339
+ countries.append(link.extra_fields["country"])
340
+
341
+ parent_collection.add_links(links)
342
+
343
+ add_collection_information(catalog_config, parent_collection, collection_config)
344
+ add_process_info(parent_collection, catalog_config, collection_config)
345
+ parent_collection.update_extent_from_items()
346
+ # Add bbox extents from children
347
+ for c_child in parent_collection.get_children():
348
+ if isinstance(c_child, Collection):
349
+ parent_collection.extent.spatial.bboxes.append(c_child.extent.spatial.bboxes[0])
350
+ # Fill summaries for locations
351
+ parent_collection.summaries = Summaries(
352
+ {
353
+ "cities": list(set(locations)),
354
+ "countries": list(set(countries)),
355
+ }
356
+ )
357
+ add_to_catalog(parent_collection, catalog, {}, collection_config)
369
358
 
370
359
 
371
360
  def add_to_catalog(
@@ -451,6 +440,11 @@ def add_to_catalog(
451
440
  is_flag=True,
452
441
  help="generate additionally thumbnail image for supported collections",
453
442
  )
443
+ @click.option(
444
+ "-gp",
445
+ is_flag=True,
446
+ help="generates the items in .parquet format",
447
+ )
454
448
  @click.argument(
455
449
  "collections",
456
450
  nargs=-1,
@@ -464,6 +458,7 @@ def process_catalogs(
464
458
  vd,
465
459
  ni,
466
460
  tn,
461
+ gp,
467
462
  collections,
468
463
  ):
469
464
  """STAC generator and harvester:
@@ -478,6 +473,7 @@ def process_catalogs(
478
473
  ni=ni,
479
474
  tn=tn,
480
475
  collections=collections,
476
+ gp=gp,
481
477
  )
482
478
  tasks = []
483
479
  for file_name in os.listdir(catalogspath):
@@ -2,7 +2,6 @@ from datetime import datetime
2
2
 
3
3
  import requests
4
4
  import spdx_lookup as lookup
5
- import yaml
6
5
  from pystac import (
7
6
  Asset,
8
7
  Catalog,
@@ -15,12 +14,12 @@ from pystac import (
15
14
  TemporalExtent,
16
15
  )
17
16
  from structlog import get_logger
18
- from yaml.loader import SafeLoader
19
17
 
20
18
  from eodash_catalog.utils import (
21
19
  generateDatetimesFromInterval,
22
20
  get_full_url,
23
21
  parse_datestring_to_tz_aware_datetime,
22
+ read_config_file,
24
23
  )
25
24
 
26
25
  LOGGER = get_logger(__name__)
@@ -399,20 +398,18 @@ def add_base_overlay_info(
399
398
  collection.add_link(create_web_map_link(layer, role="baselayer"))
400
399
  # alternatively use default base layers defined
401
400
  elif "default_base_layers" in catalog_config:
402
- with open(f'{catalog_config["default_base_layers"]}.yaml') as f:
403
- base_layers = yaml.load(f, Loader=SafeLoader)
404
- for layer in base_layers:
405
- collection.add_link(create_web_map_link(layer, role="baselayer"))
401
+ base_layers = read_config_file(catalog_config["default_base_layers"])
402
+ for layer in base_layers:
403
+ collection.add_link(create_web_map_link(layer, role="baselayer"))
406
404
  # add custom overlays just for this indicator
407
405
  if "OverlayLayers" in collection_config:
408
406
  for layer in collection_config["OverlayLayers"]:
409
407
  collection.add_link(create_web_map_link(layer, role="overlay"))
410
408
  # check if default overlay layers defined
411
409
  elif "default_overlay_layers" in catalog_config:
412
- with open("{}.yaml".format(catalog_config["default_overlay_layers"])) as f:
413
- overlay_layers = yaml.load(f, Loader=SafeLoader)
414
- for layer in overlay_layers:
415
- collection.add_link(create_web_map_link(layer, role="overlay"))
410
+ overlay_layers = read_config_file(catalog_config["default_overlay_layers"])
411
+ for layer in overlay_layers:
412
+ collection.add_link(create_web_map_link(layer, role="overlay"))
416
413
 
417
414
 
418
415
  def add_extra_fields(stac_object: Collection | Link, collection_config: dict) -> None:
eodash_catalog/utils.py CHANGED
@@ -1,3 +1,4 @@
1
+ import json
1
2
  import os
2
3
  import re
3
4
  import threading
@@ -10,11 +11,13 @@ from decimal import Decimal
10
11
  from functools import reduce, wraps
11
12
  from typing import Any
12
13
 
14
+ import stac_geoparquet as stacgp
15
+ import yaml
13
16
  from dateutil import parser
14
17
  from owslib.wcs import WebCoverageService
15
18
  from owslib.wms import WebMapService
16
19
  from owslib.wmts import WebMapTileService
17
- from pystac import Catalog, Collection, Item, RelType
20
+ from pystac import Asset, Catalog, Collection, Item, Link, RelType
18
21
  from pytz import timezone as pytztimezone
19
22
  from six import string_types
20
23
  from structlog import get_logger
@@ -233,14 +236,47 @@ class RaisingThread(threading.Thread):
233
236
  raise self._exc
234
237
 
235
238
 
236
- def recursive_save(stac_object: Catalog, no_items: bool = False) -> None:
237
- stac_object.save_object()
239
+ def recursive_save(stac_object: Catalog, no_items: bool = False, geo_parquet: bool = False) -> None:
238
240
  for child in stac_object.get_children():
239
- recursive_save(child, no_items)
241
+ recursive_save(child, no_items, geo_parquet)
240
242
  if not no_items:
241
- # try to save items if available
242
- for item in stac_object.get_items():
243
- item.save_object()
243
+ if geo_parquet:
244
+ create_geoparquet_items(stac_object)
245
+ else:
246
+ for item in stac_object.get_items():
247
+ item.save_object()
248
+ stac_object.save_object()
249
+
250
+
251
+ def create_geoparquet_items(stacObject: Catalog):
252
+ if iter_len_at_least(stacObject.get_items(), 1):
253
+ stac_dir_arr = stacObject.self_href.split("/")
254
+ stac_dir_arr.pop()
255
+ stac_dir_path = "/".join(stac_dir_arr)
256
+ items_stacgp_path = f"{stac_dir_path}/items.parquet"
257
+ to_stac_geoparquet(stacObject, items_stacgp_path)
258
+ gp_link = Link(
259
+ rel="items",
260
+ target=items_stacgp_path,
261
+ media_type="application/vnd.apache.parquet",
262
+ title="GeoParquet Items",
263
+ )
264
+ stacObject.clear_links(rel="item")
265
+ stacObject.add_links([gp_link])
266
+
267
+
268
+ def to_stac_geoparquet(stacObject: Catalog, path: str):
269
+ items = []
270
+ for item in stacObject.get_items():
271
+ if not item.geometry:
272
+ item.geometry = create_geojson_point(0, 0)["geometry"]
273
+ if not item.assets:
274
+ item.assets = {"dummy_asset": Asset(href="")}
275
+ items.append(item.to_dict())
276
+ record_batch_reader = stacgp.arrow.parse_stac_items_to_arrow(items)
277
+ table = record_batch_reader.read_all()
278
+ os.makedirs(os.path.dirname(path), exist_ok=True)
279
+ stacgp.arrow.to_parquet(table, path)
244
280
 
245
281
 
246
282
  def iter_len_at_least(i, n: int) -> int:
@@ -289,6 +325,7 @@ class Options:
289
325
  vd: bool
290
326
  ni: bool
291
327
  tn: bool
328
+ gp: bool
292
329
  collections: list[str]
293
330
 
294
331
 
@@ -392,3 +429,32 @@ def get_full_url(url: str, catalog_config) -> str:
392
429
  return url
393
430
  else:
394
431
  return f'{catalog_config["assets_endpoint"]}{url}'
432
+
433
+
434
+ def read_config_file(path: str) -> dict:
435
+ # If the given path exists directly, use it
436
+ if os.path.exists(path):
437
+ return _load_file(path)
438
+
439
+ # Otherwise, try appending supported suffixes
440
+ for suffix in [".json", ".yaml", ".yml", ".JSON", ".YAML", ".YML"]:
441
+ candidate = path + suffix
442
+ if os.path.exists(candidate):
443
+ return _load_file(candidate)
444
+
445
+ raise FileNotFoundError(
446
+ f"No file found for '{path}' with or without supported suffixes (.json/.yaml/.yml)"
447
+ )
448
+
449
+
450
+ def _load_file(filepath):
451
+ with open(filepath) as file:
452
+ content = file.read()
453
+ try:
454
+ return json.loads(content)
455
+ except json.JSONDecodeError:
456
+ pass
457
+ try:
458
+ return yaml.safe_load(content)
459
+ except yaml.YAMLError as err:
460
+ raise ValueError(f"Failed to parse '{filepath}' as JSON or YAML: {err}") from err
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eodash_catalog
3
- Version: 0.1.7
3
+ Version: 0.1.9
4
4
  Summary: This package is intended to help create a compatible STAC catalog for the eodash dashboard client. It supports configuration of multiple endpoint types for information extraction.
5
5
  Project-URL: Documentation, https://github.com/eodash/eodash_catalog#readme
6
6
  Project-URL: Issues, https://github.com/eodash/eodash_catalog/issues
@@ -33,6 +33,7 @@ Requires-Dist: requests-oauthlib<1.3.2
33
33
  Requires-Dist: requests<3
34
34
  Requires-Dist: setuptools<71
35
35
  Requires-Dist: spdx-lookup<=0.3.3
36
+ Requires-Dist: stac-geoparquet<=0.6.0
36
37
  Requires-Dist: structlog<22.0
37
38
  Requires-Dist: swiftspec==0.0.2
38
39
  Provides-Extra: dev
@@ -0,0 +1,14 @@
1
+ eodash_catalog/__about__.py,sha256=dai3IEzZp1p1kyDUiY5z5uR4Wscdqp-fRUkdtOJesKM,137
2
+ eodash_catalog/__init__.py,sha256=_W_9emPYf6FUqc0P8L2SmADx6hGSd7PlQV3yRmCk5uM,115
3
+ eodash_catalog/duration.py,sha256=B6XOZfvNU7SuqpxuVtT1kNKODoOQJXDI6mocvA_U1ik,10816
4
+ eodash_catalog/endpoints.py,sha256=kVD7pKo26wyuBaI457Z1IsDGLPd0IKKiYBE8d8h4RR4,41582
5
+ eodash_catalog/generate_indicators.py,sha256=K3Jj7bsS6fd8y82fmHPYQ1j_4N72RGpeAIx6ZHuQ1AY,19487
6
+ eodash_catalog/sh_endpoint.py,sha256=XjZsZJ5jfJZLQenSTqUhiUZ5YAu9M9nv2KL1Qv3Be-I,1219
7
+ eodash_catalog/stac_handling.py,sha256=VWivB_bn9Qwh8GrDSpicCXzItroIHz4-w5SZTkueWLg,21079
8
+ eodash_catalog/thumbnails.py,sha256=qZDcpQe80ki6lEMKYdZtSnnHH0PUpcoXTvU9bYdPlzU,2260
9
+ eodash_catalog/utils.py,sha256=zZ4PT1h1lWCMvEIfg5Xlw4re86pwR40BX8jEDweiTJ0,16115
10
+ eodash_catalog-0.1.9.dist-info/METADATA,sha256=q0NXSet4rVJG38MPU-yG0icHDaCxXl-douED8LTGkyE,3233
11
+ eodash_catalog-0.1.9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
12
+ eodash_catalog-0.1.9.dist-info/entry_points.txt,sha256=kuUQrDG1PtYd8kPjf5XM6H_NtQd9Ozwl0jjiGtAvZSM,87
13
+ eodash_catalog-0.1.9.dist-info/licenses/LICENSE.txt,sha256=oJCW5zQxnFD-J0hGz6Zh5Lkpdk1oAndmWhseTmV224E,1107
14
+ eodash_catalog-0.1.9.dist-info/RECORD,,
@@ -1,14 +0,0 @@
1
- eodash_catalog/__about__.py,sha256=xEuIdo-9kLP5EcsdX1gA9wJMUpIgojtxgX3B62i_2_k,137
2
- eodash_catalog/__init__.py,sha256=_W_9emPYf6FUqc0P8L2SmADx6hGSd7PlQV3yRmCk5uM,115
3
- eodash_catalog/duration.py,sha256=B6XOZfvNU7SuqpxuVtT1kNKODoOQJXDI6mocvA_U1ik,10816
4
- eodash_catalog/endpoints.py,sha256=YHkIGhaLL0-RF9_arPNSMeClsNSTHzP0uNZ9eJmGnK4,41545
5
- eodash_catalog/generate_indicators.py,sha256=jqPns3KOZA8GEcxGYD6NcYa5iGtjKcrKkW7LLv13PRQ,20634
6
- eodash_catalog/sh_endpoint.py,sha256=XjZsZJ5jfJZLQenSTqUhiUZ5YAu9M9nv2KL1Qv3Be-I,1219
7
- eodash_catalog/stac_handling.py,sha256=8_W45SIXV4jAUa9J-f2KZ2eor6MNfPfy0n1d-932eAE,21234
8
- eodash_catalog/thumbnails.py,sha256=qZDcpQe80ki6lEMKYdZtSnnHH0PUpcoXTvU9bYdPlzU,2260
9
- eodash_catalog/utils.py,sha256=QhZeMXRC1uvE3VzC3T_nMi9hn_3RR05s5rkzs8tCeQc,13866
10
- eodash_catalog-0.1.7.dist-info/METADATA,sha256=D1MNzWEVl32ygy-XAAaLSbUHOLjtmaYRV71GL1s8xgI,3195
11
- eodash_catalog-0.1.7.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
12
- eodash_catalog-0.1.7.dist-info/entry_points.txt,sha256=kuUQrDG1PtYd8kPjf5XM6H_NtQd9Ozwl0jjiGtAvZSM,87
13
- eodash_catalog-0.1.7.dist-info/licenses/LICENSE.txt,sha256=oJCW5zQxnFD-J0hGz6Zh5Lkpdk1oAndmWhseTmV224E,1107
14
- eodash_catalog-0.1.7.dist-info/RECORD,,