eodash_catalog 0.1.8__tar.gz → 0.1.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of eodash_catalog might be problematic. Click here for more details.

Files changed (38) hide show
  1. {eodash_catalog-0.1.8 → eodash_catalog-0.1.10}/.bumpversion.cfg +1 -1
  2. {eodash_catalog-0.1.8 → eodash_catalog-0.1.10}/.gitignore +1 -0
  3. {eodash_catalog-0.1.8 → eodash_catalog-0.1.10}/PKG-INFO +2 -1
  4. {eodash_catalog-0.1.8 → eodash_catalog-0.1.10}/pyproject.toml +1 -0
  5. {eodash_catalog-0.1.8 → eodash_catalog-0.1.10}/requirements.txt +2 -2
  6. {eodash_catalog-0.1.8 → eodash_catalog-0.1.10}/ruff.toml +1 -1
  7. {eodash_catalog-0.1.8 → eodash_catalog-0.1.10}/src/eodash_catalog/__about__.py +1 -1
  8. {eodash_catalog-0.1.8 → eodash_catalog-0.1.10}/src/eodash_catalog/endpoints.py +9 -6
  9. eodash_catalog-0.1.10/src/eodash_catalog/generate_indicators.py +487 -0
  10. {eodash_catalog-0.1.8 → eodash_catalog-0.1.10}/src/eodash_catalog/stac_handling.py +59 -15
  11. {eodash_catalog-0.1.8 → eodash_catalog-0.1.10}/src/eodash_catalog/utils.py +73 -7
  12. {eodash_catalog-0.1.8 → eodash_catalog-0.1.10}/tests/test_generate.py +80 -1
  13. eodash_catalog-0.1.10/tests/test_geoparquet.py +81 -0
  14. eodash_catalog-0.1.10/tests/testing-catalogs/testing-json.json +13 -0
  15. eodash_catalog-0.1.10/tests/testing-collections/test_locations_processing.json +69 -0
  16. eodash_catalog-0.1.10/tests/testing-collections/test_tif_demo_1_json.json +34 -0
  17. eodash_catalog-0.1.8/src/eodash_catalog/generate_indicators.py +0 -491
  18. {eodash_catalog-0.1.8 → eodash_catalog-0.1.10}/.github/workflows/ci.yml +0 -0
  19. {eodash_catalog-0.1.8 → eodash_catalog-0.1.10}/.github/workflows/python-publish.yml +0 -0
  20. {eodash_catalog-0.1.8 → eodash_catalog-0.1.10}/.vscode/extensions.json +0 -0
  21. {eodash_catalog-0.1.8 → eodash_catalog-0.1.10}/.vscode/settings.json +0 -0
  22. {eodash_catalog-0.1.8 → eodash_catalog-0.1.10}/LICENSE.txt +0 -0
  23. {eodash_catalog-0.1.8 → eodash_catalog-0.1.10}/README.md +0 -0
  24. {eodash_catalog-0.1.8 → eodash_catalog-0.1.10}/src/eodash_catalog/__init__.py +0 -0
  25. {eodash_catalog-0.1.8 → eodash_catalog-0.1.10}/src/eodash_catalog/duration.py +0 -0
  26. {eodash_catalog-0.1.8 → eodash_catalog-0.1.10}/src/eodash_catalog/sh_endpoint.py +0 -0
  27. {eodash_catalog-0.1.8 → eodash_catalog-0.1.10}/src/eodash_catalog/thumbnails.py +0 -0
  28. {eodash_catalog-0.1.8 → eodash_catalog-0.1.10}/tests/__init__.py +0 -0
  29. {eodash_catalog-0.1.8 → eodash_catalog-0.1.10}/tests/test-data/regional_forecast.json +0 -0
  30. {eodash_catalog-0.1.8 → eodash_catalog-0.1.10}/tests/testing-catalogs/testing.yaml +0 -0
  31. {eodash_catalog-0.1.8 → eodash_catalog-0.1.10}/tests/testing-collections/test_CROPOMAT1.yaml +0 -0
  32. {eodash_catalog-0.1.8 → eodash_catalog-0.1.10}/tests/testing-collections/test_see_solar_energy.yaml +0 -0
  33. {eodash_catalog-0.1.8 → eodash_catalog-0.1.10}/tests/testing-collections/test_tif_demo_1.yaml +0 -0
  34. {eodash_catalog-0.1.8 → eodash_catalog-0.1.10}/tests/testing-collections/test_tif_demo_2.yaml +0 -0
  35. {eodash_catalog-0.1.8 → eodash_catalog-0.1.10}/tests/testing-collections/test_wms_no_time.yaml +0 -0
  36. {eodash_catalog-0.1.8 → eodash_catalog-0.1.10}/tests/testing-indicators/test_indicator.yaml +0 -0
  37. {eodash_catalog-0.1.8 → eodash_catalog-0.1.10}/tests/testing-layers/baselayers.yaml +0 -0
  38. {eodash_catalog-0.1.8 → eodash_catalog-0.1.10}/tests/testing-layers/overlays.yaml +0 -0
@@ -1,5 +1,5 @@
1
1
  [bumpversion]
2
- current_version = 0.1.8
2
+ current_version = 0.1.10
3
3
  commit = True
4
4
  tag = True
5
5
  parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)\.(?P<build>\d+))?
@@ -165,3 +165,4 @@ collections/
165
165
  layers/
166
166
  .venv
167
167
  .pytest
168
+ .DS_Store
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eodash_catalog
3
- Version: 0.1.8
3
+ Version: 0.1.10
4
4
  Summary: This package is intended to help create a compatible STAC catalog for the eodash dashboard client. It supports configuration of multiple endpoint types for information extraction.
5
5
  Project-URL: Documentation, https://github.com/eodash/eodash_catalog#readme
6
6
  Project-URL: Issues, https://github.com/eodash/eodash_catalog/issues
@@ -33,6 +33,7 @@ Requires-Dist: requests-oauthlib<1.3.2
33
33
  Requires-Dist: requests<3
34
34
  Requires-Dist: setuptools<71
35
35
  Requires-Dist: spdx-lookup<=0.3.3
36
+ Requires-Dist: stac-geoparquet<=0.6.0
36
37
  Requires-Dist: structlog<22.0
37
38
  Requires-Dist: swiftspec==0.0.2
38
39
  Provides-Extra: dev
@@ -40,6 +40,7 @@ dependencies = [
40
40
  "OWSLib",
41
41
  "spdx-lookup<=0.3.3",
42
42
  "pystac[validation]<2",
43
+ "stac_geoparquet<=0.6.0"
43
44
  ]
44
45
  [project.scripts]
45
46
  eodash_catalog = "eodash_catalog.generate_indicators:process_catalogs"
@@ -3,7 +3,7 @@ setuptools<71
3
3
  oauthlib<3.3
4
4
  requests-oauthlib<1.3.2
5
5
  python-dotenv<1.1.0
6
- pystac-client==0.8.3
6
+ pystac-client==0.8.6
7
7
  pyyaml<7
8
8
  redis<4
9
9
  pygeofilter[backend-native]==0.2.0
@@ -17,7 +17,7 @@ structlog<22.0
17
17
  OWSLib==0.31
18
18
  spdx-lookup<=0.3.3
19
19
  pystac[validation]==1.10.1
20
-
20
+ stac_geoparquet==0.6.0
21
21
  # dev tooling
22
22
  pytest==8.1.1
23
23
  pytest-watch==4.2.0
@@ -5,4 +5,4 @@ target-version = "py312"
5
5
 
6
6
  [lint]
7
7
  select = ["E", "F", "UP", "B", "SIM", "I", "W", "C90", "ASYNC", "A", "C4", "PERF", "RUF"]
8
- ignore = ["B019", "C901", "UP017"]
8
+ ignore = ["B019", "C901", "UP017", "C419"]
@@ -1,4 +1,4 @@
1
1
  # SPDX-FileCopyrightText: 2024-present Daniel Santillan <daniel.santillan@eox.at>
2
2
  #
3
3
  # SPDX-License-Identifier: MIT
4
- __version__ = "0.1.8"
4
+ __version__ = "0.1.10"
@@ -17,6 +17,7 @@ from eodash_catalog.sh_endpoint import get_SH_token
17
17
  from eodash_catalog.stac_handling import (
18
18
  add_collection_information,
19
19
  add_example_info,
20
+ add_process_info_child_collection,
20
21
  add_projection_info,
21
22
  get_collection_datetimes_from_config,
22
23
  get_or_create_collection,
@@ -174,7 +175,7 @@ def handle_STAC_based_endpoint(
174
175
  collection.description = location["Name"]
175
176
  # TODO: should we remove all assets from sub collections?
176
177
  link = root_collection.add_child(collection)
177
- latlng = f'{location["Point"][1]},{location["Point"][0]}'
178
+ latlng = f'{location["Point"][1]},{location["Point"][0]}'.strip()
178
179
  # Add extra properties we need
179
180
  link.extra_fields["id"] = location["Identifier"]
180
181
  link.extra_fields["latlng"] = latlng
@@ -182,6 +183,7 @@ def handle_STAC_based_endpoint(
182
183
  add_example_info(collection, collection_config, endpoint_config, catalog_config)
183
184
  # eodash v4 compatibility
184
185
  add_visualization_info(collection, collection_config, endpoint_config)
186
+ add_process_info_child_collection(collection, catalog_config, collection_config)
185
187
  if "OverwriteBBox" in location:
186
188
  collection.extent.spatial = SpatialExtent(
187
189
  [
@@ -208,7 +210,7 @@ def handle_STAC_based_endpoint(
208
210
  )
209
211
  # eodash v4 compatibility
210
212
  add_visualization_info(root_collection, collection_config, endpoint_config)
211
- add_collection_information(catalog_config, root_collection, collection_config)
213
+ add_collection_information(catalog_config, root_collection, collection_config, True)
212
214
  add_example_info(root_collection, collection_config, endpoint_config, catalog_config)
213
215
  return root_collection
214
216
 
@@ -406,7 +408,7 @@ def handle_SH_WMS_endpoint(
406
408
 
407
409
  link = root_collection.add_child(collection)
408
410
  # bubble up information we want to the link
409
- latlng = "{},{}".format(location["Point"][1], location["Point"][0])
411
+ latlng = "{},{}".format(location["Point"][1], location["Point"][0]).strip()
410
412
  link.extra_fields["id"] = location["Identifier"]
411
413
  link.extra_fields["latlng"] = latlng
412
414
  link.extra_fields["country"] = location["Country"]
@@ -416,6 +418,7 @@ def handle_SH_WMS_endpoint(
416
418
  else:
417
419
  LOGGER.warn(f"NO datetimes configured for collection: {collection_config['Name']}!")
418
420
  add_visualization_info(collection, collection_config, endpoint_config)
421
+ add_process_info_child_collection(collection, catalog_config, collection_config)
419
422
 
420
423
  root_collection.update_extent_from_items()
421
424
  # Add bbox extents from children
@@ -443,7 +446,7 @@ def handle_SH_WMS_endpoint(
443
446
  item_link = root_collection.add_item(item)
444
447
  item_link.extra_fields["datetime"] = format_datetime_to_isostring_zulu(dt)
445
448
  # eodash v4 compatibility
446
- add_collection_information(catalog_config, root_collection, collection_config)
449
+ add_collection_information(catalog_config, root_collection, collection_config, True)
447
450
  add_visualization_info(root_collection, collection_config, endpoint_config)
448
451
  return root_collection
449
452
 
@@ -528,7 +531,7 @@ def handle_GeoDB_endpoint(
528
531
  id=IdValue,
529
532
  bbox=bbox,
530
533
  properties={},
531
- geometry=create_geojson_point(lon, lat),
534
+ geometry=create_geojson_point(lon, lat)["geometry"],
532
535
  datetime=None,
533
536
  start_datetime=min_date,
534
537
  end_datetime=max_date,
@@ -990,7 +993,7 @@ def handle_raw_source(
990
993
  id=format_datetime_to_isostring_zulu(dt),
991
994
  bbox=bbox,
992
995
  properties={},
993
- geometry=create_geojson_from_bbox(bbox),
996
+ geometry=create_geojson_from_bbox(bbox)["features"][0]["geometry"],
994
997
  datetime=dt,
995
998
  assets=assets,
996
999
  extra_fields={},
@@ -0,0 +1,487 @@
1
+ #!/usr/bin/python
2
+ """
3
+ Indicator generator to harvest information from endpoints and generate catalog
4
+
5
+ """
6
+
7
+ import os
8
+ import time
9
+ from typing import Any
10
+
11
+ import click
12
+ from dotenv import load_dotenv
13
+ from pystac import Catalog, CatalogType, Collection, Link, Summaries
14
+ from pystac.layout import TemplateLayoutStrategy
15
+ from pystac.validation import validate_all
16
+ from structlog import get_logger
17
+
18
+ from eodash_catalog.endpoints import (
19
+ handle_collection_only,
20
+ handle_custom_endpoint,
21
+ handle_GeoDB_endpoint,
22
+ handle_rasdaman_endpoint,
23
+ handle_raw_source,
24
+ handle_SH_endpoint,
25
+ handle_SH_WMS_endpoint,
26
+ handle_VEDA_endpoint,
27
+ handle_WMS_endpoint,
28
+ handle_xcube_endpoint,
29
+ )
30
+ from eodash_catalog.stac_handling import (
31
+ add_base_overlay_info,
32
+ add_collection_information,
33
+ add_extra_fields,
34
+ add_process_info,
35
+ add_projection_info,
36
+ get_or_create_collection,
37
+ )
38
+ from eodash_catalog.utils import (
39
+ Options,
40
+ RaisingThread,
41
+ add_single_item_if_collection_empty,
42
+ iter_len_at_least,
43
+ read_config_file,
44
+ recursive_save,
45
+ retry,
46
+ )
47
+
48
+ # make sure we are loading the env local definition
49
+ load_dotenv()
50
+ LOGGER = get_logger(__name__)
51
+
52
+
53
+ def process_catalog_file(file_path: str, options: Options):
54
+ LOGGER.info(f"Processing catalog: {file_path}")
55
+ catalog_config: dict = read_config_file(file_path)
56
+ if len(options.collections) > 0:
57
+ # create only catalogs containing the passed collections
58
+ process_collections = [c for c in catalog_config["collections"] if c in options.collections]
59
+ elif (len(options.collections) == 1 and options.collections == "all") or len(
60
+ options.collections
61
+ ) == 0:
62
+ # create full catalog
63
+ process_collections = catalog_config["collections"]
64
+ if len(process_collections) == 0:
65
+ LOGGER.info("No applicable collections found for catalog, skipping creation")
66
+ return
67
+ catalog = Catalog(
68
+ id=catalog_config["id"],
69
+ description=catalog_config["description"],
70
+ title=catalog_config["title"],
71
+ catalog_type=CatalogType.RELATIVE_PUBLISHED,
72
+ )
73
+ for collection in process_collections:
74
+ file_path = f"{options.collectionspath}/{collection}"
75
+ try:
76
+ # if collection file exists process it as indicator
77
+ # collection will be added as single collection to indicator
78
+ process_indicator_file(catalog_config, file_path, catalog, options)
79
+ except FileNotFoundError:
80
+ # if not exists try to see if indicator definition available
81
+ file_path_indicator = f"{options.indicatorspath}/{collection}"
82
+ try:
83
+ process_indicator_file(
84
+ catalog_config,
85
+ file_path_indicator,
86
+ catalog,
87
+ options,
88
+ )
89
+ except FileNotFoundError:
90
+ LOGGER.info(f"Warning: neither collection nor indicator found for {collection}")
91
+ if "MapProjection" in catalog_config:
92
+ catalog.extra_fields["eodash:mapProjection"] = catalog_config["MapProjection"]
93
+
94
+ strategy = TemplateLayoutStrategy(item_template="${collection}/${year}")
95
+ # expecting that the catalog will be hosted online, self url should correspond to that
96
+ # default to a local folder + catalog id in case not set
97
+
98
+ LOGGER.info("Started creation of collection files")
99
+ start = time.time()
100
+ if options.ni or options.gp:
101
+ catalog_self_href = f'{options.outputpath}/{catalog_config["id"]}'
102
+ catalog.normalize_hrefs(catalog_self_href, strategy=strategy)
103
+ recursive_save(catalog, options.ni, options.gp)
104
+ else:
105
+ # For full catalog save with items this still seems to be faster
106
+ catalog_self_href = catalog_config.get(
107
+ "endpoint", "{}/{}".format(options.outputpath, catalog_config["id"])
108
+ )
109
+ catalog.normalize_hrefs(catalog_self_href, strategy=strategy)
110
+ catalog.save(dest_href="{}/{}".format(options.outputpath, catalog_config["id"]))
111
+ end = time.time()
112
+ LOGGER.info(f"Catalog {catalog_config['id']}: Time consumed in saving: {end - start}")
113
+
114
+ if options.vd:
115
+ # try to validate catalog if flag was set
116
+ LOGGER.info(f"Running validation of catalog {file_path}")
117
+ try:
118
+ validate_all(catalog.to_dict(), href=catalog_config["endpoint"])
119
+ except Exception as e:
120
+ LOGGER.info(f"Issue validation collection: {e}")
121
+
122
+
123
+ def extract_indicator_info(parent_collection: Collection):
124
+ to_extract = [
125
+ "subcode",
126
+ "themes",
127
+ "keywords",
128
+ "satellite",
129
+ "sensor",
130
+ "cities",
131
+ "countries",
132
+ "thumbnail",
133
+ ]
134
+ summaries: dict[str, Any] = {}
135
+ for key in to_extract:
136
+ summaries[key] = set()
137
+
138
+ for collection in parent_collection.get_collections():
139
+ for key in to_extract:
140
+ if key in collection.extra_fields:
141
+ param = collection.extra_fields[key]
142
+ if isinstance(param, list):
143
+ for p in param:
144
+ summaries[key].add(p)
145
+ else:
146
+ summaries[key].add(param)
147
+ # extract also summary information
148
+ if collection.summaries.lists and collection.summaries.lists.get(key):
149
+ for p in collection.summaries.lists[key]:
150
+ summaries[key].add(p)
151
+
152
+ for key in to_extract:
153
+ # convert all items back to a list
154
+ summaries[key] = list(summaries[key])
155
+ # remove empty ones
156
+ if len(summaries[key]) == 0:
157
+ del summaries[key]
158
+ parent_collection.summaries = Summaries(summaries)
159
+
160
+
161
+ def process_indicator_file(
162
+ catalog_config: dict, file_path: str, catalog: Catalog, options: Options
163
+ ):
164
+ LOGGER.info(f"Processing indicator: {file_path}")
165
+ indicator_config = read_config_file(file_path)
166
+ parent_indicator = get_or_create_collection(
167
+ catalog, indicator_config["Name"], indicator_config, catalog_config, {}
168
+ )
169
+ if "Collections" in indicator_config:
170
+ for collection in indicator_config["Collections"]:
171
+ process_collection_file(
172
+ catalog_config,
173
+ f"{options.collectionspath}/{collection}",
174
+ parent_indicator,
175
+ options,
176
+ "Disable" in indicator_config and collection in indicator_config["Disable"],
177
+ )
178
+ else:
179
+ # we assume that collection files can also be loaded directly
180
+ process_collection_file(catalog_config, file_path, parent_indicator, options)
181
+ add_collection_information(catalog_config, parent_indicator, indicator_config, True)
182
+ if iter_len_at_least(parent_indicator.get_items(recursive=True), 1):
183
+ parent_indicator.update_extent_from_items()
184
+ # Add bbox extents from children
185
+ for c_child in parent_indicator.get_children():
186
+ if isinstance(c_child, Collection): # typing reason
187
+ parent_indicator.extent.spatial.bboxes.append(c_child.extent.spatial.bboxes[0])
188
+ # extract collection information and add it to summary indicator level
189
+ extract_indicator_info(parent_indicator)
190
+ add_process_info(parent_indicator, catalog_config, indicator_config)
191
+ # add baselayer and overview information to indicator collection
192
+ add_base_overlay_info(parent_indicator, catalog_config, indicator_config)
193
+ add_to_catalog(parent_indicator, catalog, {}, indicator_config)
194
+
195
+
196
+ @retry((Exception), tries=3, delay=5, backoff=2, logger=LOGGER)
197
+ def process_collection_file(
198
+ catalog_config: dict,
199
+ file_path: str,
200
+ catalog: Catalog | Collection,
201
+ options: Options,
202
+ disable=False,
203
+ ):
204
+ LOGGER.info(f"Processing collection: {file_path}")
205
+ collection_config = read_config_file(file_path)
206
+ if "Resources" in collection_config:
207
+ for endpoint_config in collection_config["Resources"]:
208
+ try:
209
+ collection = None
210
+ if endpoint_config["Name"] == "Sentinel Hub":
211
+ collection = handle_SH_endpoint(
212
+ catalog_config, endpoint_config, collection_config, catalog, options
213
+ )
214
+ elif endpoint_config["Name"] == "Sentinel Hub WMS":
215
+ collection = handle_SH_WMS_endpoint(
216
+ catalog_config, endpoint_config, collection_config, catalog
217
+ )
218
+ elif endpoint_config["Name"] == "GeoDB":
219
+ collection = handle_GeoDB_endpoint(
220
+ catalog_config, endpoint_config, collection_config, catalog
221
+ )
222
+ elif endpoint_config["Name"] == "VEDA":
223
+ collection = handle_VEDA_endpoint(
224
+ catalog_config, endpoint_config, collection_config, catalog, options
225
+ )
226
+ elif endpoint_config["Name"] == "marinedatastore":
227
+ collection = handle_WMS_endpoint(
228
+ catalog_config, endpoint_config, collection_config, catalog, wmts=True
229
+ )
230
+ elif endpoint_config["Name"] == "xcube":
231
+ collection = handle_xcube_endpoint(
232
+ catalog_config, endpoint_config, collection_config, catalog
233
+ )
234
+ elif endpoint_config["Name"] == "rasdaman":
235
+ collection = handle_rasdaman_endpoint(
236
+ catalog_config, endpoint_config, collection_config, catalog
237
+ )
238
+ elif endpoint_config["Name"] == "WMS":
239
+ collection = handle_WMS_endpoint(
240
+ catalog_config, endpoint_config, collection_config, catalog
241
+ )
242
+ elif endpoint_config["Name"] == "JAXA_WMTS_PALSAR":
243
+ # somewhat one off creation of individual WMTS layers as individual items
244
+ collection = handle_WMS_endpoint(
245
+ catalog_config, endpoint_config, collection_config, catalog, wmts=True
246
+ )
247
+ elif endpoint_config["Name"] == "Collection-only":
248
+ collection = handle_collection_only(
249
+ catalog_config, endpoint_config, collection_config, catalog
250
+ )
251
+ elif endpoint_config["Name"] == "Custom-Endpoint":
252
+ collection = handle_custom_endpoint(
253
+ catalog_config,
254
+ endpoint_config,
255
+ collection_config,
256
+ catalog,
257
+ )
258
+ elif endpoint_config["Name"] in [
259
+ "COG source",
260
+ "GeoJSON source",
261
+ "FlatGeobuf source",
262
+ ]:
263
+ collection = handle_raw_source(
264
+ catalog_config, endpoint_config, collection_config, catalog
265
+ )
266
+ else:
267
+ raise ValueError("Type of Resource is not supported")
268
+ if collection:
269
+ add_single_item_if_collection_empty(collection)
270
+ add_projection_info(endpoint_config, collection)
271
+ add_to_catalog(collection, catalog, endpoint_config, collection_config, disable)
272
+ else:
273
+ raise Exception(f"No collection was generated for resource {endpoint_config}")
274
+ except Exception as e:
275
+ LOGGER.warn(f"""Exception: {e.args[0]} with config: {endpoint_config}""")
276
+ raise e
277
+
278
+ elif "Subcollections" in collection_config:
279
+ # if no endpoint is specified we check for definition of subcollections
280
+ parent_collection = get_or_create_collection(
281
+ catalog, collection_config["Name"], collection_config, catalog_config, {}
282
+ )
283
+
284
+ locations = []
285
+ countries = []
286
+ for sub_coll_def in collection_config["Subcollections"]:
287
+ # Subcollection has only data on one location which
288
+ # is defined for the entire collection
289
+ if "Name" in sub_coll_def and "Point" in sub_coll_def:
290
+ locations.append(sub_coll_def["Name"])
291
+ if isinstance(sub_coll_def["Country"], list):
292
+ countries.extend(sub_coll_def["Country"])
293
+ else:
294
+ countries.append(sub_coll_def["Country"])
295
+ process_collection_file(
296
+ catalog_config,
297
+ "{}/{}".format(options.collectionspath, sub_coll_def["Collection"]),
298
+ parent_collection,
299
+ options,
300
+ )
301
+ # find link in parent collection to update metadata
302
+ for link in parent_collection.links:
303
+ if (
304
+ link.rel == "child"
305
+ and "id" in link.extra_fields
306
+ and link.extra_fields["id"] == sub_coll_def["Identifier"]
307
+ ):
308
+ latlng = "{},{}".format(
309
+ sub_coll_def["Point"][1],
310
+ sub_coll_def["Point"][0],
311
+ )
312
+ link.extra_fields["id"] = sub_coll_def["Identifier"]
313
+ link.extra_fields["latlng"] = latlng
314
+ link.extra_fields["name"] = sub_coll_def["Name"]
315
+ # Update title of collection to use location name
316
+ sub_collection = parent_collection.get_child(id=sub_coll_def["Identifier"])
317
+ if sub_collection:
318
+ sub_collection.title = sub_coll_def["Name"]
319
+ # The subcollection has multiple locations which need to be extracted
320
+ # and elevated to parent collection level
321
+ else:
322
+ # create temp catalog to save collection
323
+ tmp_catalog = Catalog(id="tmp_catalog", description="temp catalog placeholder")
324
+ process_collection_file(
325
+ catalog_config,
326
+ "{}/{}".format(options.collectionspath, sub_coll_def["Collection"]),
327
+ tmp_catalog,
328
+ options,
329
+ )
330
+ links = tmp_catalog.get_child(sub_coll_def["Identifier"]).get_links() # type: ignore
331
+ for link in links:
332
+ # extract summary information
333
+ if "city" in link.extra_fields:
334
+ locations.append(link.extra_fields["city"])
335
+ if "country" in link.extra_fields:
336
+ if isinstance(link.extra_fields["country"], list):
337
+ countries.extend(link.extra_fields["country"])
338
+ else:
339
+ countries.append(link.extra_fields["country"])
340
+
341
+ parent_collection.add_links(links)
342
+
343
+ add_collection_information(catalog_config, parent_collection, collection_config)
344
+ add_process_info(parent_collection, catalog_config, collection_config)
345
+ parent_collection.update_extent_from_items()
346
+ # Add bbox extents from children
347
+ for c_child in parent_collection.get_children():
348
+ if isinstance(c_child, Collection):
349
+ parent_collection.extent.spatial.bboxes.append(c_child.extent.spatial.bboxes[0])
350
+ # Fill summaries for locations
351
+ parent_collection.summaries = Summaries(
352
+ {
353
+ "cities": list(set(locations)),
354
+ "countries": list(set(countries)),
355
+ }
356
+ )
357
+ add_to_catalog(parent_collection, catalog, {}, collection_config)
358
+
359
+
360
+ def add_to_catalog(
361
+ collection: Collection, catalog: Catalog, endpoint: dict, collection_config: dict, disable=False
362
+ ):
363
+ # check if already in catalog, if it is do not re-add it
364
+ # TODO: probably we should add to the catalog only when creating
365
+ for cat_coll in catalog.get_collections():
366
+ if cat_coll.id == collection.id:
367
+ return
368
+
369
+ link: Link = catalog.add_child(collection)
370
+ # bubble fields we want to have up to collection link and add them to collection
371
+ if endpoint and "Type" in endpoint:
372
+ collection.extra_fields["endpointtype"] = "{}_{}".format(
373
+ endpoint["Name"],
374
+ endpoint["Type"],
375
+ )
376
+ link.extra_fields["endpointtype"] = "{}_{}".format(
377
+ endpoint["Name"],
378
+ endpoint["Type"],
379
+ )
380
+ elif endpoint:
381
+ collection.extra_fields["endpointtype"] = endpoint["Name"]
382
+ link.extra_fields["endpointtype"] = endpoint["Name"]
383
+ if "Subtitle" in collection_config:
384
+ link.extra_fields["subtitle"] = collection_config["Subtitle"]
385
+ link.extra_fields["title"] = collection.title
386
+ link.extra_fields["code"] = collection_config["EodashIdentifier"]
387
+ link.extra_fields["id"] = collection_config["Name"]
388
+ if "Themes" in collection_config:
389
+ link.extra_fields["themes"] = collection_config["Themes"]
390
+ # Check for summaries and bubble up info
391
+ if disable:
392
+ link.extra_fields["roles"] = ["disable"]
393
+ if collection.summaries.lists:
394
+ for summary in collection.summaries.lists:
395
+ link.extra_fields[summary] = collection.summaries.lists[summary]
396
+
397
+ add_extra_fields(link, collection_config)
398
+ return link
399
+
400
+
401
+ @click.command()
402
+ @click.option(
403
+ "--catalog",
404
+ "-ctl",
405
+ help="id of catalog configuration file to be used",
406
+ default=None,
407
+ )
408
+ @click.option(
409
+ "--catalogspath",
410
+ "-ctp",
411
+ help="path to catalog configuration files",
412
+ default="catalogs",
413
+ )
414
+ @click.option(
415
+ "--collectionspath",
416
+ "-clp",
417
+ help="path to collection configuration files",
418
+ default="collections",
419
+ )
420
+ @click.option(
421
+ "--indicatorspath",
422
+ "-inp",
423
+ help="path to indicator configuration files",
424
+ default="indicators",
425
+ )
426
+ @click.option(
427
+ "--outputpath",
428
+ "-o",
429
+ help="path where the generated catalogs will be saved",
430
+ default="build",
431
+ )
432
+ @click.option(
433
+ "-vd",
434
+ is_flag=True,
435
+ help="validation flag, if set, validation will be run on generated catalogs",
436
+ )
437
+ @click.option("-ni", is_flag=True, help="no items flag, if set, items will not be saved")
438
+ @click.option(
439
+ "-tn",
440
+ is_flag=True,
441
+ help="generate additionally thumbnail image for supported collections",
442
+ )
443
+ @click.option(
444
+ "-gp",
445
+ is_flag=True,
446
+ help="generates the items in .parquet format",
447
+ )
448
+ @click.argument(
449
+ "collections",
450
+ nargs=-1,
451
+ )
452
+ def process_catalogs(
453
+ catalog,
454
+ catalogspath,
455
+ collectionspath,
456
+ indicatorspath,
457
+ outputpath,
458
+ vd,
459
+ ni,
460
+ tn,
461
+ gp,
462
+ collections,
463
+ ):
464
+ """STAC generator and harvester:
465
+ This library goes over configured endpoints extracting as much information
466
+ as possible and generating a STAC catalog with the information"""
467
+ options = Options(
468
+ catalogspath=catalogspath,
469
+ collectionspath=collectionspath,
470
+ indicatorspath=indicatorspath,
471
+ outputpath=outputpath,
472
+ vd=vd,
473
+ ni=ni,
474
+ tn=tn,
475
+ collections=collections,
476
+ gp=gp,
477
+ )
478
+ tasks = []
479
+ for file_name in os.listdir(catalogspath):
480
+ file_path = f"{catalogspath}/{file_name}"
481
+ if os.path.isfile(file_path) and (
482
+ catalog is None or os.path.splitext(file_name)[0] == catalog
483
+ ):
484
+ tasks.append(RaisingThread(target=process_catalog_file, args=(file_path, options)))
485
+ tasks[-1].start()
486
+ for task in tasks:
487
+ task.join()