earthcode 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
earthcode/validator.py ADDED
@@ -0,0 +1,605 @@
1
+ import json
2
+ import os
3
+ import re
4
+ from pathlib import Path
5
+ from typing import Dict, List, Any, Optional
6
+ import pystac
7
+ from jsonschema import validate, RefResolver
8
+
9
+ # Try importing PIL for image validation
10
+ try:
11
+ from PIL import Image
12
+ except ImportError:
13
+ Image = None
14
+
15
+ ROOT_CHILDREN = ["eo-missions", "products", "projects", "themes", "variables", "experiments", "workflows"]
16
+ EXTENSION_SCHEMES = {
17
+ "osc": "https://stac-extensions.github.io/osc/v1.0.0/schema.json",
18
+ "themes": "https://stac-extensions.github.io/themes/v1.0.0/schema.json",
19
+ "contacts": "https://stac-extensions.github.io/contacts/v0.1.1/schema.json",
20
+ }
21
+ LINK_PREFIX = "https://esa-earthcode.github.io/open-science-catalog-metadata/"
22
+ THEMES_SCHEME = "https://github.com/stac-extensions/osc#theme"
23
+ RELATED_TITLE_PREFIX = {
24
+ "projects": "Project",
25
+ "products": "Product",
26
+ "eo-missions": "EO Mission",
27
+ "themes": "Theme",
28
+ "variables": "Variable",
29
+ "workflows": "Workflow",
30
+ "experiments": "Experiment"
31
+ }
32
+
33
+
34
+ # --- Common Utilities ---
35
+
36
+ def _infer_file_path(data: Dict, root: Path) -> Path:
37
+ obj_id = data.get("id")
38
+ obj_type = data.get("type")
39
+
40
+ if obj_id == "osc" and obj_type == "Catalog":
41
+ return root / "catalog.json"
42
+
43
+ if obj_id in ROOT_CHILDREN and obj_type == "Catalog":
44
+ return root / obj_id / "catalog.json"
45
+
46
+ osc_type = data.get("osc:type")
47
+
48
+ if osc_type == "project":
49
+ return root / "projects" / obj_id / "collection.json"
50
+ if osc_type == "product":
51
+ return root / "products" / obj_id / "collection.json"
52
+
53
+ candidates = [
54
+ root / "eo-missions" / obj_id / "catalog.json",
55
+ root / "themes" / obj_id / "catalog.json",
56
+ root / "variables" / obj_id / "catalog.json",
57
+ root / "workflows" / obj_id / "record.json",
58
+ root / "experiments" / obj_id / "record.json"
59
+ ]
60
+
61
+ for c in candidates:
62
+ if c.exists():
63
+ return c
64
+
65
+ # check if the file is a product STAC item
66
+ if data.get('collection') is not None:
67
+ return root / "products" / data.get('collection') / f"{obj_id}.json"
68
+
69
+ if osc_type:
70
+ raise ValueError(f"Could not locate file for {osc_type} with id {obj_id}")
71
+
72
+ raise ValueError(f"Could not infer file path for object id '{obj_id}'. Ensure file exists in standard OSC structure.")
73
+
74
+
75
+ def _assert(ctx, condition, message):
76
+ if not condition:
77
+ ctx["errors"].append(message)
78
+
79
+ def _resolve(ctx, href):
80
+ if href.startswith(LINK_PREFIX):
81
+ href = href[len(LINK_PREFIX):]
82
+
83
+ base = ctx["file_path"].parent
84
+ return (base / href).resolve()
85
+
86
+ def _get_title_for_file(path: Path):
87
+ try:
88
+ with open(path, 'r', encoding='utf-8') as f:
89
+ stac = json.load(f)
90
+ if stac.get("type") == "Feature":
91
+ return stac.get("properties", {}).get("title")
92
+ return stac.get("title")
93
+ except (FileNotFoundError, json.JSONDecodeError):
94
+ return None
95
+
96
+ def _get_link_with_rel(data, rel):
97
+ links = data.get("links", [])
98
+ if isinstance(links, list):
99
+ for link in links:
100
+ if link.get("href") and link.get("rel") == rel:
101
+ return link
102
+ return None
103
+
104
+ def _has_link_with_rel(ctx, rel):
105
+ link = _get_link_with_rel(ctx["data"], rel)
106
+ _assert(ctx, isinstance(link, dict), f"must have {rel} link")
107
+ return link
108
+
109
+ def _has_extensions(ctx, extensions):
110
+ stac_ext = ctx["data"].get("stac_extensions", [])
111
+ if isinstance(stac_ext, list):
112
+ for ext in extensions:
113
+ url = EXTENSION_SCHEMES.get(ext)
114
+ if url:
115
+ _assert(ctx, url in stac_ext, f"must implement extension: {ext}")
116
+ else:
117
+ _assert(ctx, False, f"Extension definition missing for {ext}")
118
+ else:
119
+ _assert(ctx, False, f"must implement extensions: {', '.join(extensions)}")
120
+
121
+ def _ensure_id_is_folder_name(ctx):
122
+ parent_folder_name = ctx["file_path"].parent.name
123
+ _assert(ctx, ctx["data"].get("id") == parent_folder_name, "parent folder name must match id")
124
+
125
+ def _check_stac_links_rel_abs(ctx, include_item_child=True):
126
+ rels = ['related', 'parent']
127
+ if include_item_child:
128
+ rels.extend(['item', 'child'])
129
+
130
+ for link in ctx["data"].get("links", []):
131
+ href = link.get("href", "")
132
+ rel = link.get("rel")
133
+ if rel == 'self':
134
+ _assert(ctx, href.startswith(LINK_PREFIX), f"Link 'self' must start with '{LINK_PREFIX}'")
135
+ elif rel in rels:
136
+ _assert(ctx, "://" not in href, f"Link '{rel}' to '{href}' must be relative")
137
+
138
+ def _check_link_title(ctx, link, prefix=''):
139
+ href_resolved = _resolve(ctx, link['href'])
140
+ title = _get_title_for_file(href_resolved)
141
+
142
+ if isinstance(title, str):
143
+ expected = f"{prefix}{title}" if prefix else title
144
+ msg = f"'{expected}'" if prefix else f"title of linked file {href_resolved}"
145
+ _assert(ctx, link.get("title") == expected, f"Title of link to {link['href']} (rel: {link['rel']}) must be {msg}")
146
+
147
+ def _require_parent_link(ctx, expected_path):
148
+ _check_stac_link(ctx, 'parent', expected_path)
149
+
150
+ def _require_root_link(ctx, expected_path):
151
+ _check_stac_link(ctx, 'root', expected_path)
152
+
153
+ def _require_via_link(ctx):
154
+ _has_link_with_rel(ctx, "via")
155
+
156
+ def _check_stac_link(ctx, rel_type, expected_path):
157
+ link = _has_link_with_rel(ctx, rel_type)
158
+ if not link: return
159
+
160
+ res_link = _resolve(ctx, link['href'])
161
+ res_expected = _resolve(ctx, expected_path)
162
+
163
+ _assert(ctx, res_link == res_expected, f"{rel_type} link must point to {expected_path}")
164
+ _assert(ctx, link.get("type") == "application/json", f"{rel_type} link must be application/json")
165
+ _check_link_title(ctx, link)
166
+
167
+ def _check_preview_image(ctx):
168
+ link = _has_link_with_rel(ctx, "preview")
169
+ if not link: return
170
+
171
+ _assert(ctx, link.get("type") == "image/webp", "Preview type must be image/webp")
172
+ _assert(ctx, link.get("proj:epsg") is None, "proj:epsg must be null")
173
+
174
+ preview_path = _resolve(ctx, link['href'])
175
+
176
+ if Image and preview_path.exists():
177
+ try:
178
+ with Image.open(preview_path) as img:
179
+ w, h = img.size
180
+ _assert(ctx, link.get("proj:shape") == [h, w], f"proj:shape mismatch for {preview_path}")
181
+ except Exception:
182
+ _assert(ctx, False, f"Preview image corrupt: {preview_path}")
183
+ elif not preview_path.exists():
184
+ _assert(ctx, False, f"Preview image doesn't exist: {preview_path}")
185
+
186
+ def _check_child_links(ctx, expected_type="products", expected_filename="collection"):
187
+ links = [l for l in ctx["data"].get("links", []) if l.get("rel") == "child"]
188
+
189
+ for link in links:
190
+ _assert(ctx, link.get("type") == "application/json", f"Link child to {link['href']} type must be json")
191
+ href_path = Path(link['href'])
192
+ ftype = href_path.parent.parent.name
193
+ fname = href_path.name
194
+
195
+ _assert(ctx, ftype == expected_type, f"Child link to {link['href']} must point to folder '{expected_type}'")
196
+ _assert(ctx, fname == f"{expected_filename}.json", f"Child link must point to '{expected_filename}.json'")
197
+ _check_link_title(ctx, link)
198
+
199
+ resolved = _resolve(ctx, link['href'])
200
+ _assert(ctx, resolved.exists(), f"must have file for link {resolved}")
201
+
202
+ def _require_child_links_for_other_json(ctx, files_to_check=None, filename="collection", link_rel='child'):
203
+ target_files = []
204
+
205
+ if files_to_check:
206
+ # Assuming files_to_check is a list of folder names in the current directory
207
+ # logic mirrors JS: resolve(file) -> check exists
208
+ for f in files_to_check:
209
+ # Construct path relative to current folder
210
+ # JS logic: if array, map resolve. resolve() uses folder.
211
+ # ROOT_CHILDREN are folders.
212
+ p = ctx["file_path"].parent / f / "catalog.json" # Assumption for root children
213
+ if not p.exists():
214
+ p = ctx["file_path"].parent / f / "collection.json"
215
+
216
+ if p.exists():
217
+ target_files.append(p)
218
+ else:
219
+ # Scan directory
220
+ current_folder = ctx["file_path"].parent
221
+ if current_folder.exists():
222
+ for entry in os.scandir(current_folder):
223
+ if entry.is_dir():
224
+ if filename:
225
+ cand = Path(entry.path) / f"{filename}.json"
226
+ if cand.exists(): target_files.append(cand)
227
+ else:
228
+ for sub in os.scandir(entry.path):
229
+ if sub.name.endswith(".json"):
230
+ target_files.append(Path(sub.path))
231
+
232
+ links = [l for l in ctx["data"].get("links", []) if l.get("href") and l.get("rel") == link_rel]
233
+ link_hrefs = [_resolve(ctx, l['href']) for l in links]
234
+
235
+ for link in links:
236
+ _assert(ctx, link.get("type") == "application/json", f"{link_rel} link type error")
237
+ _check_link_title(ctx, link)
238
+
239
+ for tf in target_files:
240
+ if tf not in link_hrefs:
241
+ _assert(ctx, False, f"must have link with relation {link_rel} to {tf}")
242
+
243
+ for lh in link_hrefs:
244
+ # If we have a link, the file MUST exist
245
+ if not lh.exists():
246
+ _assert(ctx, False, f"must have file for link {lh}")
247
+
248
+ def _check_themes(ctx):
249
+ themes = ctx["data"].get("themes")
250
+ _assert(ctx, isinstance(themes, list), "'themes' must be an array")
251
+ _has_extensions(ctx, ["themes"])
252
+
253
+ if not isinstance(themes, list): return
254
+
255
+ theme_obj = next((th for th in themes if th.get("scheme") == THEMES_SCHEME), None)
256
+ _assert(ctx, theme_obj is not None, f"must have theme with scheme '{THEMES_SCHEME}'")
257
+ if not theme_obj: return
258
+
259
+ concepts = theme_obj.get("concepts")
260
+ _assert(ctx, isinstance(concepts, list), "concepts must be an array")
261
+
262
+ if isinstance(concepts, list):
263
+ for obj in concepts:
264
+ theme_path = _resolve(ctx, f"../../themes/{obj['id']}/catalog.json")
265
+ _assert(ctx, theme_path.exists(), f"Referenced theme '{obj['id']}' must exist at {theme_path}")
266
+ _check_related_link(ctx, "themes", obj['id'], "catalog")
267
+
268
+ def _check_related_link(ctx, type_name, id_val, filename="collection"):
269
+ suffix = f"/{type_name}/{id_val}/{filename}.json"
270
+ link = next((l for l in ctx["data"].get("links", [])
271
+ if l.get("rel") == "related" and l.get("href", "").endswith(suffix)), None)
272
+
273
+ _assert(ctx, link is not None, f"must have 'related' link to {type_name} with id '{id_val}'")
274
+
275
+ if link:
276
+ _assert(ctx, link.get("type") == "application/json", "related link type must be json")
277
+ prefix = RELATED_TITLE_PREFIX.get(type_name, "") + ": "
278
+ _check_link_title(ctx, link, prefix)
279
+
280
+ def _check_osc_cross_ref_array(ctx, field, type_name, required=False):
281
+ vals = ctx["data"].get(field)
282
+ if required:
283
+ _assert(ctx, isinstance(vals, list), f"'{field}' must be array")
284
+ if isinstance(vals, list):
285
+ for val in vals:
286
+ _check_osc_cross_ref(ctx, val, type_name, True)
287
+
288
+ def _check_osc_cross_ref(ctx, value, type_name, required=False):
289
+ if not value and not required: return
290
+
291
+ fname = "catalog"
292
+ if type_name in ["projects", "products"]: fname = "collection"
293
+ if type_name in ["experiments", "workflows"]: fname = "record"
294
+
295
+ path_ref = _resolve(ctx, f"../../{type_name}/{value}/{fname}.json")
296
+ _assert(ctx, path_ref.exists(), f"Referenced {type_name} '{value}' must exist")
297
+ _check_related_link(ctx, type_name, value, fname)
298
+
299
+ def _require_technical_officer(ctx):
300
+ contacts = ctx["data"].get("contacts")
301
+ _assert(ctx, isinstance(contacts, list), "must have contacts")
302
+ if not isinstance(contacts, list): return
303
+
304
+ tech = next((c for c in contacts if "technical_officer" in c.get("roles", [])), None)
305
+ if tech:
306
+ _assert(ctx, len(tech.get("name", "")) > 1, "tech officer must have name")
307
+ emails = tech.get("emails", [])
308
+ _assert(ctx, len(emails) > 0 and len(emails[0].get("value", "")) > 1, "tech officer must have email")
309
+ else:
310
+ _assert(ctx, False, "must have technical officer contact")
311
+
312
+ def _no_duplicated_links(ctx):
313
+ links = [(l['rel'], l.get('title', ''), l['href']) for l in ctx['data'].get("links", [])]
314
+ _assert(ctx, len(links) == len(set(links)), "There should be no duplciated links")
315
+
316
+ def _validate_project(ctx):
317
+
318
+ data = ctx["data"]
319
+ _assert(ctx, data.get("type") == "Collection", "type must be 'Collection'")
320
+
321
+ _ensure_id_is_folder_name(ctx)
322
+ _require_via_link(ctx)
323
+
324
+ _require_parent_link(ctx, "../catalog.json")
325
+ _require_root_link(ctx, "../../catalog.json")
326
+ _check_child_links(ctx)
327
+ _check_stac_links_rel_abs(ctx)
328
+
329
+
330
+ _assert(ctx, data.get("osc:type") == "project", "'osc:type' must be 'project'")
331
+
332
+ _check_osc_cross_ref_array(ctx, "osc:workflows", "workflows")
333
+
334
+ _check_themes(ctx)
335
+
336
+ _has_extensions(ctx, ["osc", "contacts"])
337
+ _require_technical_officer(ctx)
338
+
339
+
340
+
341
+ def _validate_product(ctx):
342
+ data = ctx["data"]
343
+ _assert(ctx, data.get("type") == "Collection", "type must be 'Collection'")
344
+ _has_extensions(ctx, ["osc"])
345
+ _ensure_id_is_folder_name(ctx)
346
+ _require_via_link(ctx)
347
+
348
+ _require_parent_link(ctx, "../catalog.json")
349
+ _require_root_link(ctx, "../../catalog.json")
350
+ _check_stac_links_rel_abs(ctx, include_item_child=False)
351
+
352
+ _assert(ctx, data.get("osc:type") == "product", "'osc:type' must be 'product'")
353
+ _assert(ctx, isinstance(data.get("osc:project"), str), "'osc:project' must be a string")
354
+
355
+ _check_osc_cross_ref(ctx, data.get("osc:project"), "projects", required=True)
356
+ _check_osc_cross_ref_array(ctx, "osc:variables", "variables")
357
+ _check_osc_cross_ref_array(ctx, "osc:missions", "eo-missions")
358
+ _check_osc_cross_ref(ctx, data.get("osc:experiment"), "experiments")
359
+
360
+ _check_themes(ctx)
361
+
362
+
363
+ def _validate_root(ctx):
364
+ if ctx["data"].get("type") != "Catalog":
365
+ ctx["errors"].append("type must be 'Catalog'")
366
+ if ctx["data"].get("id") != "osc":
367
+ ctx["errors"].append("id must be 'osc'")
368
+ if ctx["data"].get("title") != "Open Science Catalog":
369
+ ctx["errors"].append("title must be 'Open Science Catalog'")
370
+
371
+ _require_root_link(ctx, "./catalog.json")
372
+
373
+ if _get_link_with_rel(ctx["data"], 'parent'):
374
+ ctx["errors"].append("must NOT have a parent")
375
+
376
+ _check_stac_links_rel_abs(ctx)
377
+ _require_child_links_for_other_json(ctx, ROOT_CHILDREN)
378
+
379
+ def _validate_sub_catalogs(ctx, child_stac_type):
380
+ if ctx["data"].get("type") != "Catalog":
381
+ ctx["errors"].append("type must be 'Catalog'")
382
+
383
+ _ensure_id_is_folder_name(ctx)
384
+ _require_parent_link(ctx, "../catalog.json")
385
+ _require_root_link(ctx, "../catalog.json")
386
+ _check_stac_links_rel_abs(ctx)
387
+
388
+ rel_type = 'item' if child_stac_type == 'Record' else 'child'
389
+ _require_child_links_for_other_json(ctx, None, child_stac_type.lower(), rel_type)
390
+
391
+
392
+ def _validate_eo_mission(ctx):
393
+ if ctx["data"].get("type") != "Catalog":
394
+ ctx["errors"].append("type must be 'Catalog'")
395
+
396
+ _ensure_id_is_folder_name(ctx)
397
+ _require_via_link(ctx)
398
+ _require_parent_link(ctx, "../catalog.json")
399
+ _require_root_link(ctx, "../../catalog.json")
400
+ _check_child_links(ctx)
401
+ _check_stac_links_rel_abs(ctx)
402
+
403
+ def _validate_theme(ctx):
404
+ if ctx["data"].get("type") != "Catalog":
405
+ ctx["errors"].append("type must be 'Catalog'")
406
+
407
+ _ensure_id_is_folder_name(ctx)
408
+ _require_parent_link(ctx, "../catalog.json")
409
+ _require_root_link(ctx, "../../catalog.json")
410
+ _check_child_links(ctx)
411
+ _check_stac_links_rel_abs(ctx)
412
+ _check_preview_image(ctx)
413
+
414
+ def _validate_variable(ctx):
415
+ if ctx["data"].get("type") != "Catalog":
416
+ ctx["errors"].append("type must be 'Catalog'")
417
+
418
+ _has_extensions(ctx, ["themes"])
419
+ _ensure_id_is_folder_name(ctx)
420
+ _require_via_link(ctx)
421
+ _require_parent_link(ctx, "../catalog.json")
422
+ _require_root_link(ctx, "../../catalog.json")
423
+ _check_child_links(ctx)
424
+ _check_stac_links_rel_abs(ctx)
425
+ _check_themes(ctx)
426
+
427
+ def _validate_workflow(ctx):
428
+ pass
429
+ # data = ctx["data"]
430
+ # if data.get("type") != "Feature":
431
+ # ctx["errors"].append("type must be 'Feature'")
432
+
433
+ # _ensure_id_is_folder_name(ctx)
434
+ # _require_parent_link(ctx, "../catalog.json")
435
+ # _require_root_link(ctx, "../../catalog.json")
436
+ # _check_child_links(ctx, "experiments", "record")
437
+ # _check_stac_links_rel_abs(ctx, False)
438
+
439
+ # props = data.get("properties", {})
440
+ # if not isinstance(props.get("osc:project"), str):
441
+ # ctx["errors"].append("'osc:project' must be a string")
442
+
443
+ # _check_osc_cross_ref(ctx, props.get("osc:project"), "projects", True)
444
+
445
+ def _validate_experiment(ctx):
446
+ pass
447
+ # data = ctx["data"]
448
+ # if data.get("type") != "Feature":
449
+ # ctx["errors"].append("type must be 'Feature'")
450
+
451
+ # _ensure_id_is_folder_name(ctx)
452
+ # _require_parent_link(ctx, "../catalog.json")
453
+ # _require_root_link(ctx, "../../catalog.json")
454
+ # _check_child_links(ctx)
455
+ # _check_stac_links_rel_abs(ctx, False)
456
+
457
+ # props = data.get("properties", {})
458
+ # if not isinstance(props.get("osc:workflow"), str):
459
+ # ctx["errors"].append("'osc:workflow' must be a string")
460
+
461
+ # _check_osc_cross_ref(ctx, props.get("osc:workflow"), "workflows", True)
462
+
463
+ # _has_link_with_rel(ctx, "environment")
464
+ # _has_link_with_rel(ctx, "input")
465
+
466
+
467
+ def _validate_relative_schema(ctx, schema_file):
468
+
469
+ schema_file = Path(__file__).resolve().parent / schema_file
470
+ with open(schema_file, 'r', encoding='utf-8') as f:
471
+ schema = json.load(f)
472
+
473
+ with open(ctx['file_path'], 'r', encoding='utf-8') as f:
474
+ data = json.load(f)
475
+
476
+ # Create a base URI for the folder containing the schema
477
+ base_uri = Path(schema_file).absolute().parent.as_uri() + "/"
478
+ resolver = RefResolver(base_uri=base_uri, referrer=schema)
479
+ try:
480
+ validate(instance=data, schema=schema, resolver=resolver)
481
+ except Exception as e:
482
+ ctx['errors'].append(e)
483
+
484
+
485
+ #TODO: Implement Item checks
486
+ def _validate_user_content(ctx):
487
+ pass
488
+
489
+ def validateOSCEntry(data: dict, catalog_root: Path) -> List[str]:
490
+ """
491
+ Validates a STAC project (or catalog/collection) against OSC rules.
492
+ """
493
+
494
+ errors = []
495
+ catalog_root = Path(catalog_root).resolve()
496
+
497
+ # infer the objects's location in the OSC structure
498
+ try:
499
+ file_path = _infer_file_path(data, catalog_root)
500
+ except ValueError as e:
501
+ errors.append(str(e))
502
+ return errors
503
+
504
+
505
+ # Generate validation context
506
+ rel_path = "/" + file_path.relative_to(catalog_root).as_posix()
507
+
508
+ is_root_catalog = rel_path.endswith("/catalog.json") and data.get("id") == "osc"
509
+ is_eo_mission = "/eo-missions/" in rel_path and rel_path.endswith("/catalog.json")
510
+ is_product = "/products/" in rel_path and rel_path.endswith("/collection.json")
511
+ is_project = "/projects/" in rel_path and rel_path.endswith("/collection.json")
512
+ is_theme = "/themes/" in rel_path and rel_path.endswith("/catalog.json")
513
+ is_variable = "/variables/" in rel_path and rel_path.endswith("/catalog.json")
514
+ is_workflow = "/workflows/" in rel_path and rel_path.endswith("/record.json")
515
+ is_experiment = "/experiments/" in rel_path and rel_path.endswith("/record.json")
516
+
517
+ is_sub_catalog_root = bool(re.search(r"/(eo-missions|products|projects|themes|variables|workflows|experiments)/catalog\.json", rel_path))
518
+
519
+ # Context object to pass around
520
+ ctx = {
521
+ "data": data,
522
+ "file_path": file_path,
523
+ "root": catalog_root,
524
+ "errors": errors
525
+ }
526
+
527
+ # do General Checks
528
+ if (data.get("stac_version") not in ["1.0.0", "1.1.0"]) and not(is_experiment or is_workflow):
529
+ errors.append("stac_version must be '1.0.0' or '1.1.0'")
530
+
531
+ if data.get("type") in ["Catalog", "Collection"]:
532
+ title = data.get("title")
533
+ if not (isinstance(title, str) and len(title) > 0):
534
+ errors.append("must have a title")
535
+
536
+ # _no_duplicated_links(ctx)
537
+
538
+ # call specific validation function
539
+ if is_sub_catalog_root:
540
+
541
+ child_entity = Path(rel_path).parent.name
542
+ child_stac_type = 'Catalog'
543
+ if child_entity in ['products', 'projects']:
544
+ child_stac_type = 'Collection'
545
+ elif child_entity in ['workflows', 'experiments']:
546
+ child_stac_type = 'Record'
547
+ _validate_sub_catalogs(ctx, child_stac_type)
548
+
549
+ elif is_root_catalog:
550
+ # validate schema
551
+ schema_path = 'schemas/catalog.json'
552
+ _validate_relative_schema(ctx, schema_path)
553
+ #validate custom rules
554
+ _validate_root(ctx)
555
+ elif is_eo_mission:
556
+ _validate_relative_schema(ctx, 'schemas/eo-missions/children.json')
557
+ _validate_eo_mission(ctx)
558
+ elif is_product:
559
+ _validate_relative_schema(ctx, 'schemas/products/children.json')
560
+ _validate_product(ctx)
561
+ elif is_project:
562
+ _validate_relative_schema(ctx, 'schemas/projects/children.json')
563
+ _validate_project(ctx)
564
+ elif is_theme:
565
+ _validate_relative_schema(ctx, 'schemas/themes/children.json')
566
+ _validate_theme(ctx)
567
+ elif is_variable:
568
+ _validate_relative_schema(ctx, 'schemas/variables/children.json')
569
+ _validate_variable(ctx)
570
+ elif is_workflow:
571
+ _validate_relative_schema(ctx, 'schemas/workflows/children.json')
572
+ _validate_workflow(ctx)
573
+ elif is_experiment:
574
+ _validate_relative_schema(ctx, 'schemas/experiments/children.json')
575
+ _validate_experiment(ctx)
576
+ else:
577
+ ## TODO: add users updates
578
+ if "/products/" in rel_path:
579
+ _validate_user_content(ctx)
580
+ else:
581
+ errors.append(f"Validation context could not be determined for path: {rel_path}")
582
+
583
+ return errors
584
+
585
+ def validate_catalog(root_path):
586
+ root = Path(root_path).resolve()
587
+ if not root.exists():
588
+ print(f"Error: Path {root} does not exist.")
589
+
590
+ errors = []
591
+ error_files = []
592
+
593
+ # Recursive walk
594
+ for current_dir, _, files in os.walk(root):
595
+ for file in files:
596
+ if file.endswith(".json"):
597
+ full_path = Path(current_dir) / file
598
+ with open(full_path, 'r', encoding='utf-8') as f:
599
+ stac_object = json.load(f)
600
+ file_errors = validateOSCEntry(stac_object, root)
601
+ if file_errors:
602
+ errors.append(file_errors)
603
+ error_files.append(full_path)
604
+
605
+ return errors, error_files
@@ -0,0 +1,70 @@
1
+ Metadata-Version: 2.4
2
+ Name: earthcode
3
+ Version: 0.1.0
4
+ Summary: Tools for creating, validating, and searching EarthCODE Open Science Catalog metadata.
5
+ Project-URL: Homepage, https://github.com/ESA-EarthCODE/earthcode-library
6
+ Project-URL: Repository, https://github.com/ESA-EarthCODE/earthcode-library
7
+ Project-URL: Issues, https://github.com/ESA-EarthCODE/earthcode-library/issues
8
+ Project-URL: Documentation, https://esa-earthcode.github.io/earthcode-library/
9
+ Author-email: Krasen Samardzhiev <krasensam@gmail.com>, Deyan Samardzhiev <dean@lampata.co.uk>
10
+ License-Expression: MIT
11
+ License-File: LICENSE
12
+ Keywords: earth observation,earthcode,geospatial
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Topic :: Scientific/Engineering
19
+ Requires-Python: >=3.12
20
+ Requires-Dist: bottleneck<2,>=1.6.0
21
+ Requires-Dist: dask<2026,>=2025.12.0
22
+ Requires-Dist: fastembed<0.8,>=0.7.4
23
+ Requires-Dist: fsspec<2026,>=2025.10.0
24
+ Requires-Dist: geopandas<2,>=1.1.2
25
+ Requires-Dist: jsonschema<5,>=4.25.1
26
+ Requires-Dist: netcdf4<2,>=1.7.3
27
+ Requires-Dist: numpy<3,>=2.4.1
28
+ Requires-Dist: pandas<4,>=3.0.0
29
+ Requires-Dist: pillow<12,>=10.3
30
+ Requires-Dist: pyarrow<24,>=23.0.0
31
+ Requires-Dist: pydantic<3,>=2.12.5
32
+ Requires-Dist: pylance<0.25,>=0.24.0
33
+ Requires-Dist: pystac<2,>=1.14.1
34
+ Requires-Dist: requests<3,>=2.32.5
35
+ Requires-Dist: rioxarray<0.21,>=0.20.0
36
+ Requires-Dist: shapely<3,>=2.1.2
37
+ Requires-Dist: xarray<2026,>=2025.12.0
38
+ Requires-Dist: xstac<2,>=1.2.0
39
+ Requires-Dist: zarr<4,>=3.1.5
40
+ Provides-Extra: dev
41
+ Requires-Dist: jupyter-book<2,>=1.0.4; extra == 'dev'
42
+ Requires-Dist: jupyterlab<5,>=4.5.0; extra == 'dev'
43
+ Requires-Dist: papermill<3,>=2.7.0; extra == 'dev'
44
+ Requires-Dist: pytest-cov<8,>=7.0.0; extra == 'dev'
45
+ Requires-Dist: pytest<10,>=9.0.2; extra == 'dev'
46
+ Description-Content-Type: text/markdown
47
+
48
+ # earthcode
49
+
50
+ Python tools for creating, validating, and searching EarthCODE Open Science Catalog metadata.
51
+
52
+ ```bash
53
+ pip install earthcode
54
+ ```
55
+
56
+ ## Development
57
+
58
+ To run:
59
+
60
+ 1. `git clone https://github.com/ESA-EarthCODE/earthcode-library.git`
61
+ 2. Install pixi - https://pixi.sh/dev/installation/
62
+ 3. `cd earthcode-library`
63
+ 4. `pixi install`
64
+ 5. `pixi run jupyter lab`
65
+
66
+ We have examples for:
67
+ - `./examples/example_create_osc_entries.ipynb` - shows how to create OSC entries
68
+ - `./examples/contribute_via_pr_osc.ipynb` - shows how to add newly created entries to the OSC, using a GitHub pull request
69
+ - `./examples/contribute_via_osc_editor.ipynb` - shows how to add entries to the OSC, using a combination of this library and the OSC Editor (a GUI tool)
70
+ - `./examples/earthcode_publishing_guide.ipynb` - is a simplified introduction to the OSC and the necessary steps to publish data
@@ -0,0 +1,12 @@
1
+ earthcode/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ earthcode/fairtool.py,sha256=ZNZAu7zrIFqiq4Z9jy-Dvfa_fdj_rrShyzW_DN_Gx5g,19374
3
+ earthcode/git_add.py,sha256=BgcK5-MmEZu4-tuIz0qroNOyW9urtl_UcGMyRiCBmZQ,15030
4
+ earthcode/gitclerk_add.py,sha256=uy1jtjSjb88hWe6hQpAMQ4Ny3u9oYArnbKnYg3nYpwY,926
5
+ earthcode/metadata_input_definitions.py,sha256=sMZq21YMSUXVVaOnojIybhSYPobBxJ8mMIt0tClojp0,11140
6
+ earthcode/search.py,sha256=9GoYf43ZjEhkbTEYW_YE2ctAL2Iz8WS9MRr9C6QLGyE,6951
7
+ earthcode/static.py,sha256=hbMFRT6uSfUtfWagObDxsVhMGF31isVmKb4ZnMY1ttw,22772
8
+ earthcode/validator.py,sha256=o1MgdJIVBtf4AmT8eLEV6cDPpGwZL-5ET_a149BfLJw,22721
9
+ earthcode-0.1.0.dist-info/METADATA,sha256=GAz6kYVaS1VEKG-FI2atwpJxwADHsW7QxVqA5v95pno,2854
10
+ earthcode-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
11
+ earthcode-0.1.0.dist-info/licenses/LICENSE,sha256=wAarzwK43-0P70SMlskTyYHf57V3gV-0TcJ-AGFjedQ,1066
12
+ earthcode-0.1.0.dist-info/RECORD,,