eodash_catalog 0.0.10__py3-none-any.whl → 0.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of eodash_catalog might be problematic. Click here for more details.

@@ -6,25 +6,20 @@ Indicator generator to harvest information from endpoints and generate catalog
6
6
 
7
7
  import os
8
8
  import time
9
- from dataclasses import dataclass
9
+ from typing import Any
10
10
 
11
11
  import click
12
12
  import yaml
13
13
  from dotenv import load_dotenv
14
- from pystac import (
15
- Catalog,
16
- CatalogType,
17
- Collection,
18
- Summaries,
19
- )
14
+ from pystac import Catalog, CatalogType, Collection, Link, Summaries
20
15
  from pystac.layout import TemplateLayoutStrategy
21
16
  from pystac.validation import validate_all
22
17
  from yaml.loader import SafeLoader
23
18
 
24
19
  from eodash_catalog.endpoints import (
25
20
  handle_collection_only,
21
+ handle_custom_endpoint,
26
22
  handle_GeoDB_endpoint,
27
- handle_GeoDB_Tiles_endpoint,
28
23
  handle_SH_endpoint,
29
24
  handle_SH_WMS_endpoint,
30
25
  handle_VEDA_endpoint,
@@ -35,10 +30,12 @@ from eodash_catalog.stac_handling import (
35
30
  add_base_overlay_info,
36
31
  add_collection_information,
37
32
  add_extra_fields,
38
- get_or_create_collection_and_times,
33
+ get_or_create_collection,
39
34
  )
40
35
  from eodash_catalog.utils import (
36
+ Options,
41
37
  RaisingThread,
38
+ add_single_item_if_collection_empty,
42
39
  iter_len_at_least,
43
40
  recursive_save,
44
41
  )
@@ -47,38 +44,28 @@ from eodash_catalog.utils import (
47
44
  load_dotenv()
48
45
 
49
46
 
50
- @dataclass
51
- class Options:
52
- catalogspath: str
53
- collectionspath: str
54
- indicatorspath: str
55
- outputpath: str
56
- vd: bool
57
- ni: bool
58
- tn: bool
59
- collections: list[str]
60
-
61
-
62
47
  def process_catalog_file(file_path: str, options: Options):
63
48
  print("Processing catalog:", file_path)
64
49
  with open(file_path) as f:
65
- config: dict = yaml.load(f, Loader=SafeLoader)
50
+ catalog_config: dict = yaml.load(f, Loader=SafeLoader)
66
51
 
67
52
  if len(options.collections) > 0:
68
53
  # create only catalogs containing the passed collections
69
- process_collections = [c for c in config["collections"] if c in options.collections]
54
+ process_collections = [
55
+ c for c in catalog_config["collections"] if c in options.collections
56
+ ]
70
57
  elif (len(options.collections) == 1 and options.collections == "all") or len(
71
58
  options.collections
72
59
  ) == 0:
73
60
  # create full catalog
74
- process_collections = config["collections"]
61
+ process_collections = catalog_config["collections"]
75
62
  if len(process_collections) == 0:
76
63
  print("No applicable collections found for catalog, skipping creation")
77
64
  return
78
65
  catalog = Catalog(
79
- id=config["id"],
80
- description=config["description"],
81
- title=config["title"],
66
+ id=catalog_config["id"],
67
+ description=catalog_config["description"],
68
+ title=catalog_config["title"],
82
69
  catalog_type=CatalogType.RELATIVE_PUBLISHED,
83
70
  )
84
71
  for collection in process_collections:
@@ -86,13 +73,13 @@ def process_catalog_file(file_path: str, options: Options):
86
73
  if os.path.isfile(file_path):
87
74
  # if collection file exists process it as indicator
88
75
  # collection will be added as single collection to indicator
89
- process_indicator_file(config, file_path, catalog, options)
76
+ process_indicator_file(catalog_config, file_path, catalog, options)
90
77
  else:
91
78
  # if not try to see if indicator definition available
92
79
  file_path = f"{options.indicatorspath}/{collection}.yaml"
93
80
  if os.path.isfile(file_path):
94
81
  process_indicator_file(
95
- config,
82
+ catalog_config,
96
83
  f"{options.indicatorspath}/{collection}.yaml",
97
84
  catalog,
98
85
  options,
@@ -107,29 +94,29 @@ def process_catalog_file(file_path: str, options: Options):
107
94
  print("Started creation of collection files")
108
95
  start = time.time()
109
96
  if options.ni:
110
- catalog_self_href = f'{options.outputpath}/{config["id"]}'
97
+ catalog_self_href = f'{options.outputpath}/{catalog_config["id"]}'
111
98
  catalog.normalize_hrefs(catalog_self_href, strategy=strategy)
112
99
  recursive_save(catalog, options.ni)
113
100
  else:
114
101
  # For full catalog save with items this still seems to be faster
115
- catalog_self_href = config.get(
116
- "endpoint", "{}/{}".format(options.outputpath, config["id"])
102
+ catalog_self_href = catalog_config.get(
103
+ "endpoint", "{}/{}".format(options.outputpath, catalog_config["id"])
117
104
  )
118
105
  catalog.normalize_hrefs(catalog_self_href, strategy=strategy)
119
- catalog.save(dest_href="{}/{}".format(options.outputpath, config["id"]))
106
+ catalog.save(dest_href="{}/{}".format(options.outputpath, catalog_config["id"]))
120
107
  end = time.time()
121
- print(f"Catalog {config['id']}: Time consumed in saving: {end - start}")
108
+ print(f"Catalog {catalog_config['id']}: Time consumed in saving: {end - start}")
122
109
 
123
110
  if options.vd:
124
111
  # try to validate catalog if flag was set
125
112
  print(f"Running validation of catalog {file_path}")
126
113
  try:
127
- validate_all(catalog.to_dict(), href=config["endpoint"])
114
+ validate_all(catalog.to_dict(), href=catalog_config["endpoint"])
128
115
  except Exception as e:
129
116
  print(f"Issue validation collection: {e}")
130
117
 
131
118
 
132
- def extract_indicator_info(parent_collection):
119
+ def extract_indicator_info(parent_collection: Collection):
133
120
  to_extract = [
134
121
  "subcode",
135
122
  "themes",
@@ -139,7 +126,7 @@ def extract_indicator_info(parent_collection):
139
126
  "cities",
140
127
  "countries",
141
128
  ]
142
- summaries = {}
129
+ summaries: dict[str, Any] = {}
143
130
  for key in to_extract:
144
131
  summaries[key] = set()
145
132
 
@@ -166,83 +153,109 @@ def extract_indicator_info(parent_collection):
166
153
  parent_collection.summaries = Summaries(summaries)
167
154
 
168
155
 
169
- def process_indicator_file(config: dict, file_path: str, catalog: Catalog, options: Options):
156
+ def process_indicator_file(
157
+ catalog_config: dict, file_path: str, catalog: Catalog, options: Options
158
+ ):
170
159
  with open(file_path) as f:
171
160
  print("Processing indicator:", file_path)
172
- data: dict = yaml.load(f, Loader=SafeLoader)
173
- parent_indicator, _ = get_or_create_collection_and_times(
174
- catalog, data["Name"], data, config, {}
161
+ indicator_config: dict = yaml.load(f, Loader=SafeLoader)
162
+ parent_indicator = get_or_create_collection(
163
+ catalog, indicator_config["Name"], indicator_config, catalog_config, {}
175
164
  )
176
- if "Collections" in data:
177
- for collection in data["Collections"]:
165
+ if "Collections" in indicator_config:
166
+ for collection in indicator_config["Collections"]:
178
167
  process_collection_file(
179
- config,
168
+ catalog_config,
180
169
  f"{options.collectionspath}/{collection}.yaml",
181
170
  parent_indicator,
182
171
  options,
183
172
  )
184
173
  else:
185
174
  # we assume that collection files can also be loaded directly
186
- process_collection_file(config, file_path, parent_indicator, options)
187
- add_collection_information(config, parent_indicator, data)
175
+ process_collection_file(catalog_config, file_path, parent_indicator, options)
176
+ add_collection_information(catalog_config, parent_indicator, indicator_config)
188
177
  if iter_len_at_least(parent_indicator.get_items(recursive=True), 1):
189
178
  parent_indicator.update_extent_from_items()
190
179
  # Add bbox extents from children
191
180
  for c_child in parent_indicator.get_children():
192
- parent_indicator.extent.spatial.bboxes.append(c_child.extent.spatial.bboxes[0])
181
+ if isinstance(c_child, Collection): # typing reason
182
+ parent_indicator.extent.spatial.bboxes.append(c_child.extent.spatial.bboxes[0])
193
183
  # extract collection information and add it to summary indicator level
194
184
  extract_indicator_info(parent_indicator)
195
185
  # add baselayer and overview information to indicator collection
196
- add_base_overlay_info(parent_indicator, config, data)
197
- add_to_catalog(parent_indicator, catalog, None, data)
186
+ add_base_overlay_info(parent_indicator, catalog_config, indicator_config)
187
+ add_to_catalog(parent_indicator, catalog, {}, indicator_config)
198
188
 
199
189
 
200
190
  def process_collection_file(
201
- config: dict, file_path: str, catalog: Catalog | Collection, options: Options
191
+ catalog_config: dict, file_path: str, catalog: Catalog | Collection, options: Options
202
192
  ):
203
193
  print("Processing collection:", file_path)
204
194
  with open(file_path) as f:
205
- data: dict = yaml.load(f, Loader=SafeLoader)
206
- if "Resources" in data:
207
- for resource in data["Resources"]:
208
- if "EndPoint" in resource:
209
- collection = None
210
- if resource["Name"] == "Sentinel Hub":
211
- collection = handle_SH_endpoint(config, resource, data, catalog, options)
212
- elif resource["Name"] == "Sentinel Hub WMS":
213
- collection = handle_SH_WMS_endpoint(config, resource, data, catalog)
214
- elif resource["Name"] == "GeoDB":
215
- collection = handle_GeoDB_endpoint(config, resource, data, catalog)
216
- elif resource["Name"] == "VEDA":
217
- collection = handle_VEDA_endpoint(config, resource, data, catalog, options)
218
- elif resource["Name"] == "marinedatastore":
219
- collection = handle_WMS_endpoint(config, resource, data, catalog, wmts=True)
220
- elif resource["Name"] == "xcube":
221
- collection = handle_xcube_endpoint(config, resource, data, catalog)
222
- elif resource["Name"] == "WMS":
223
- collection = handle_WMS_endpoint(config, resource, data, catalog)
224
- elif resource["Name"] == "GeoDB Vector Tiles":
225
- collection = handle_GeoDB_Tiles_endpoint(config, resource, data, catalog)
226
- elif resource["Name"] == "JAXA_WMTS_PALSAR":
227
- # somewhat one off creation of individual WMTS layers as individual items
228
- collection = handle_WMS_endpoint(config, resource, data, catalog, wmts=True)
229
- elif resource["Name"] == "Collection-only":
230
- collection = handle_collection_only(config, resource, data, catalog)
231
- else:
232
- raise ValueError("Type of Resource is not supported")
233
- if collection is not None:
234
- add_to_catalog(collection, catalog, resource, data)
235
- else:
236
- raise Exception("No collection generated")
237
- elif "Subcollections" in data:
195
+ collection_config: dict = yaml.load(f, Loader=SafeLoader)
196
+ if "Resources" in collection_config:
197
+ for resource in collection_config["Resources"]:
198
+ collection = None
199
+ if resource["Name"] == "Sentinel Hub":
200
+ collection = handle_SH_endpoint(
201
+ catalog_config, resource, collection_config, catalog, options
202
+ )
203
+ elif resource["Name"] == "Sentinel Hub WMS":
204
+ collection = handle_SH_WMS_endpoint(
205
+ catalog_config, resource, collection_config, catalog
206
+ )
207
+ elif resource["Name"] == "GeoDB":
208
+ collection = handle_GeoDB_endpoint(
209
+ catalog_config, resource, collection_config, catalog
210
+ )
211
+ elif resource["Name"] == "VEDA":
212
+ collection = handle_VEDA_endpoint(
213
+ catalog_config, resource, collection_config, catalog, options
214
+ )
215
+ elif resource["Name"] == "marinedatastore":
216
+ collection = handle_WMS_endpoint(
217
+ catalog_config, resource, collection_config, catalog, wmts=True
218
+ )
219
+ elif resource["Name"] == "xcube":
220
+ collection = handle_xcube_endpoint(
221
+ catalog_config, resource, collection_config, catalog
222
+ )
223
+ elif resource["Name"] == "WMS":
224
+ collection = handle_WMS_endpoint(
225
+ catalog_config, resource, collection_config, catalog
226
+ )
227
+ elif resource["Name"] == "JAXA_WMTS_PALSAR":
228
+ # somewhat one off creation of individual WMTS layers as individual items
229
+ collection = handle_WMS_endpoint(
230
+ catalog_config, resource, collection_config, catalog, wmts=True
231
+ )
232
+ elif resource["Name"] == "Collection-only":
233
+ collection = handle_collection_only(
234
+ catalog_config, resource, collection_config, catalog
235
+ )
236
+ elif resource["Name"] == "Custom-Endpoint":
237
+ collection = handle_custom_endpoint(
238
+ catalog_config,
239
+ resource,
240
+ collection_config,
241
+ catalog,
242
+ )
243
+ else:
244
+ raise ValueError("Type of Resource is not supported")
245
+ if collection:
246
+ add_single_item_if_collection_empty(collection)
247
+ add_to_catalog(collection, catalog, resource, collection_config)
248
+ else:
249
+ raise Exception(f"No collection was generated for resource {resource}")
250
+ elif "Subcollections" in collection_config:
238
251
  # if no endpoint is specified we check for definition of subcollections
239
- parent_collection, _ = get_or_create_collection_and_times(
240
- catalog, data["Name"], data, config, {}
252
+ parent_collection = get_or_create_collection(
253
+ catalog, collection_config["Name"], collection_config, catalog_config, {}
241
254
  )
242
255
 
243
256
  locations = []
244
257
  countries = []
245
- for sub_coll_def in data["Subcollections"]:
258
+ for sub_coll_def in collection_config["Subcollections"]:
246
259
  # Subcollection has only data on one location which
247
260
  # is defined for the entire collection
248
261
  if "Name" in sub_coll_def and "Point" in sub_coll_def:
@@ -252,7 +265,7 @@ def process_collection_file(
252
265
  else:
253
266
  countries.append(sub_coll_def["Country"])
254
267
  process_collection_file(
255
- config,
268
+ catalog_config,
256
269
  "{}/{}.yaml".format(options.collectionspath, sub_coll_def["Collection"]),
257
270
  parent_collection,
258
271
  options,
@@ -281,7 +294,7 @@ def process_collection_file(
281
294
  # create temp catalog to save collection
282
295
  tmp_catalog = Catalog(id="tmp_catalog", description="temp catalog placeholder")
283
296
  process_collection_file(
284
- config,
297
+ catalog_config,
285
298
  "{}/{}.yaml".format(options.collectionspath, sub_coll_def["Collection"]),
286
299
  tmp_catalog,
287
300
  options,
@@ -299,11 +312,12 @@ def process_collection_file(
299
312
 
300
313
  parent_collection.add_links(links)
301
314
 
302
- add_collection_information(config, parent_collection, data)
315
+ add_collection_information(catalog_config, parent_collection, collection_config)
303
316
  parent_collection.update_extent_from_items()
304
317
  # Add bbox extents from children
305
318
  for c_child in parent_collection.get_children():
306
- parent_collection.extent.spatial.bboxes.append(c_child.extent.spatial.bboxes[0])
319
+ if isinstance(c_child, Collection):
320
+ parent_collection.extent.spatial.bboxes.append(c_child.extent.spatial.bboxes[0])
307
321
  # Fill summaries for locations
308
322
  parent_collection.summaries = Summaries(
309
323
  {
@@ -311,17 +325,19 @@ def process_collection_file(
311
325
  "countries": list(set(countries)),
312
326
  }
313
327
  )
314
- add_to_catalog(parent_collection, catalog, None, data)
328
+ add_to_catalog(parent_collection, catalog, {}, collection_config)
315
329
 
316
330
 
317
- def add_to_catalog(collection, catalog, endpoint, data):
331
+ def add_to_catalog(
332
+ collection: Collection, catalog: Catalog, endpoint: dict, collection_config: dict
333
+ ):
318
334
  # check if already in catalog, if it is do not re-add it
319
335
  # TODO: probably we should add to the catalog only when creating
320
336
  for cat_coll in catalog.get_collections():
321
337
  if cat_coll.id == collection.id:
322
338
  return
323
339
 
324
- link = catalog.add_child(collection)
340
+ link: Link = catalog.add_child(collection)
325
341
  # bubble fields we want to have up to collection link and add them to collection
326
342
  if endpoint and "Type" in endpoint:
327
343
  collection.extra_fields["endpointtype"] = "{}_{}".format(
@@ -335,19 +351,19 @@ def add_to_catalog(collection, catalog, endpoint, data):
335
351
  elif endpoint:
336
352
  collection.extra_fields["endpointtype"] = endpoint["Name"]
337
353
  link.extra_fields["endpointtype"] = endpoint["Name"]
338
- if "Subtitle" in data:
339
- link.extra_fields["subtitle"] = data["Subtitle"]
354
+ if "Subtitle" in collection_config:
355
+ link.extra_fields["subtitle"] = collection_config["Subtitle"]
340
356
  link.extra_fields["title"] = collection.title
341
- link.extra_fields["code"] = data["EodashIdentifier"]
342
- link.extra_fields["id"] = data["Name"]
343
- if "Themes" in data:
344
- link.extra_fields["themes"] = data["Themes"]
357
+ link.extra_fields["code"] = collection_config["EodashIdentifier"]
358
+ link.extra_fields["id"] = collection_config["Name"]
359
+ if "Themes" in collection_config:
360
+ link.extra_fields["themes"] = collection_config["Themes"]
345
361
  # Check for summaries and bubble up info
346
362
  if collection.summaries.lists:
347
363
  for summary in collection.summaries.lists:
348
364
  link.extra_fields[summary] = collection.summaries.lists[summary]
349
365
 
350
- add_extra_fields(link, data)
366
+ add_extra_fields(link, collection_config)
351
367
  return link
352
368
 
353
369
 
@@ -3,8 +3,10 @@ import os
3
3
  from oauthlib.oauth2 import BackendApplicationClient
4
4
  from requests_oauthlib import OAuth2Session
5
5
 
6
+ SH_TOKEN_URL = "https://services.sentinel-hub.com/oauth/token"
6
7
 
7
- def get_SH_token():
8
+
9
+ def get_SH_token() -> str:
8
10
  # Your client credentials
9
11
  client_id = os.getenv("SH_CLIENT_ID")
10
12
  client_secret = os.getenv("SH_CLIENT_SECRET")
@@ -13,7 +15,7 @@ def get_SH_token():
13
15
  oauth = OAuth2Session(client=client)
14
16
  # Get token for the session
15
17
  token = oauth.fetch_token(
16
- token_url="https://services.sentinel-hub.com/oauth/token",
18
+ token_url=SH_TOKEN_URL,
17
19
  client_secret=client_secret,
18
20
  )
19
21