esgvoc 1.0.1__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of esgvoc might be problematic. Click here for more details.

Files changed (41) hide show
  1. esgvoc/__init__.py +1 -1
  2. esgvoc/api/__init__.py +0 -6
  3. esgvoc/api/data_descriptors/__init__.py +6 -0
  4. esgvoc/api/data_descriptors/archive.py +5 -0
  5. esgvoc/api/data_descriptors/citation_url.py +5 -0
  6. esgvoc/api/data_descriptors/experiment.py +2 -2
  7. esgvoc/api/data_descriptors/known_branded_variable.py +58 -5
  8. esgvoc/api/data_descriptors/regex.py +5 -0
  9. esgvoc/api/data_descriptors/vertical_label.py +2 -2
  10. esgvoc/api/project_specs.py +48 -130
  11. esgvoc/api/projects.py +104 -63
  12. esgvoc/apps/drs/generator.py +47 -42
  13. esgvoc/apps/drs/validator.py +22 -38
  14. esgvoc/apps/jsg/json_schema_generator.py +252 -136
  15. esgvoc/apps/jsg/templates/template.jinja +249 -0
  16. esgvoc/apps/test_cv/README.md +214 -0
  17. esgvoc/apps/test_cv/cv_tester.py +1368 -0
  18. esgvoc/apps/test_cv/example_usage.py +216 -0
  19. esgvoc/apps/vr/__init__.py +12 -0
  20. esgvoc/apps/vr/build_variable_registry.py +71 -0
  21. esgvoc/apps/vr/example_usage.py +60 -0
  22. esgvoc/apps/vr/vr_app.py +333 -0
  23. esgvoc/cli/config.py +671 -86
  24. esgvoc/cli/drs.py +39 -21
  25. esgvoc/cli/main.py +2 -0
  26. esgvoc/cli/test_cv.py +257 -0
  27. esgvoc/core/constants.py +10 -7
  28. esgvoc/core/data_handler.py +24 -22
  29. esgvoc/core/db/connection.py +7 -0
  30. esgvoc/core/db/project_ingestion.py +34 -9
  31. esgvoc/core/db/universe_ingestion.py +1 -2
  32. esgvoc/core/service/configuration/setting.py +192 -21
  33. esgvoc/core/service/data_merger.py +1 -1
  34. esgvoc/core/service/state.py +18 -2
  35. {esgvoc-1.0.1.dist-info → esgvoc-1.1.1.dist-info}/METADATA +2 -1
  36. {esgvoc-1.0.1.dist-info → esgvoc-1.1.1.dist-info}/RECORD +40 -29
  37. esgvoc/apps/jsg/cmip6_template.json +0 -74
  38. /esgvoc/apps/{py.typed → test_cv/__init__.py} +0 -0
  39. {esgvoc-1.0.1.dist-info → esgvoc-1.1.1.dist-info}/WHEEL +0 -0
  40. {esgvoc-1.0.1.dist-info → esgvoc-1.1.1.dist-info}/entry_points.txt +0 -0
  41. {esgvoc-1.0.1.dist-info → esgvoc-1.1.1.dist-info}/licenses/LICENSE.txt +0 -0
@@ -0,0 +1,1368 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ CV Testing Application for ESGVoc
4
+
5
+ This application allows testing of project CVs and Universe CVs with support for:
6
+ - Custom repository URLs and branches via CLI options and environment variables
7
+ - Universe branch override for testing against different WCRP-universe versions
8
+ - Validation of repository structure and content
9
+ - Testing YAML specification files (project_specs.yaml, drs_specs.yaml, catalog_spec.yaml, attr_specs.yaml)
10
+ - Testing esgvoc API integration with CV repositories
11
+ - Support for all available default projects: cmip6, cmip6plus, input4mip, obs4mip, cordex-cmip6
12
+ - Rich CLI interface integrated with esgvoc CLI
13
+ - Environment variable support for CI/CD integration
14
+ - Automatic repository path detection for synchronized CVs
15
+ """
16
+
17
+ import json
18
+ import os
19
+ import sys
20
+ from pathlib import Path
21
+ from typing import List
22
+
23
+ from pydantic import ValidationError
24
+ from rich.console import Console
25
+
26
+ import esgvoc.core.service as service
27
+ from esgvoc.core.service.configuration.setting import (
28
+ ServiceSettings,
29
+ )
30
+ from esgvoc.core.service.state import StateService
31
+
32
+ console = Console()
33
+
34
+
35
+ def detect_project_name() -> str:
36
+ """
37
+ Try to auto-detect project name from current directory or environment.
38
+ Falls back to a reasonable default for testing.
39
+ """
40
+ # Check environment first
41
+ env_project = os.environ.get("PROJECT_NAME")
42
+ if env_project:
43
+ return env_project.lower()
44
+
45
+ # Try to detect from current directory name or path
46
+ cwd = Path.cwd()
47
+ dir_name = cwd.name.lower()
48
+
49
+ # Check if directory name matches any known project patterns
50
+ project_patterns = {
51
+ "obs4mips": ["obs4mips", "obs4mip"],
52
+ "input4mips": ["input4mips", "input4mip"],
53
+ "cmip6": ["cmip6"],
54
+ "cmip6plus": ["cmip6plus", "cmip6+"],
55
+ "cordex-cmip6": ["cordex-cmip6", "cordex", "cordexcmip6"],
56
+ }
57
+
58
+ for project, patterns in project_patterns.items():
59
+ if any(pattern in dir_name for pattern in patterns):
60
+ return project
61
+
62
+ # Check parent directories
63
+ for parent in cwd.parents:
64
+ parent_name = parent.name.lower()
65
+ for project, patterns in project_patterns.items():
66
+ if any(pattern in parent_name for pattern in patterns):
67
+ return project
68
+
69
+ # Default fallback
70
+ console.print("[yellow]⚠️ Could not auto-detect project, using 'obs4mip' as default[/yellow]")
71
+ return "obs4mip"
72
+
73
+
74
+ class CVTester:
75
+ """Main CV testing class"""
76
+
77
+ def __init__(self, debug_missing_terms: bool = True):
78
+ self.original_config_name = None
79
+ self.test_config_name = "test_cv_temp"
80
+ self.config_manager = None
81
+ self.debug_missing_terms = debug_missing_terms
82
+
83
+ def get_available_projects(self) -> List[str]:
84
+ """Get list of all available project CVs"""
85
+ return list(ServiceSettings.DEFAULT_PROJECT_CONFIGS.keys())
86
+
87
+ def configure_for_testing(
88
+ self,
89
+ project_name: str = None,
90
+ repo_url: str = None,
91
+ branch: str = None,
92
+ esgvoc_branch: str = None,
93
+ universe_branch: str = None,
94
+ ) -> bool:
95
+ """
96
+ Configure esgvoc with custom or default CV settings for testing
97
+
98
+ Args:
99
+ project_name: Name of the project to test (required)
100
+ repo_url: Custom repository URL (optional - uses default if not provided)
101
+ branch: Custom branch (optional - uses default if not provided)
102
+ esgvoc_branch: ESGVoc library branch (for info only)
103
+ universe_branch: Custom universe branch (optional - uses 'esgvoc' if not provided)
104
+
105
+ Returns:
106
+ bool: True if configuration was successful
107
+ """
108
+ try:
109
+ # Get config manager and store original active configuration
110
+ self.config_manager = service.get_config_manager()
111
+ self.original_config_name = self.config_manager.get_active_config_name()
112
+
113
+ console.print(f"[blue]Current active configuration: {self.original_config_name}[/blue]")
114
+
115
+ # Determine project configuration
116
+ if project_name not in self.get_available_projects():
117
+ available = ", ".join(self.get_available_projects())
118
+ console.print(f"[red]❌ Unknown project '{project_name}'. Available projects: {available}[/red]")
119
+ return False
120
+
121
+ # Use custom repo/branch if provided, otherwise use defaults
122
+ if repo_url or branch:
123
+ # Custom configuration
124
+ default_config = ServiceSettings.DEFAULT_PROJECT_CONFIGS[project_name]
125
+ project_config = {
126
+ "project_name": project_name,
127
+ "github_repo": repo_url or default_config["github_repo"],
128
+ "branch": branch or default_config["branch"],
129
+ "local_path": default_config["local_path"],
130
+ "db_path": default_config["db_path"],
131
+ }
132
+ console.print(f"[blue]Using custom configuration for {project_name}:[/blue]")
133
+ console.print(f" Repository: {project_config['github_repo']}")
134
+ console.print(f" Branch: {project_config['branch']}")
135
+ else:
136
+ # Default configuration
137
+ project_config = ServiceSettings.DEFAULT_PROJECT_CONFIGS[project_name].copy()
138
+ console.print(f"[blue]Using default configuration for {project_name}[/blue]")
139
+
140
+ # Create temporary test configuration with universe and single project
141
+ test_config_data = {
142
+ "universe": {
143
+ "github_repo": "https://github.com/WCRP-CMIP/WCRP-universe",
144
+ "branch": universe_branch or "esgvoc",
145
+ "local_path": "repos/WCRP-universe",
146
+ "db_path": "dbs/universe.sqlite",
147
+ },
148
+ "projects": [project_config],
149
+ }
150
+
151
+ # Remove existing test config if it exists
152
+ configs = self.config_manager.list_configs()
153
+ if self.test_config_name in configs:
154
+ console.print(f"[yellow]Removing existing test configuration: {self.test_config_name}[/yellow]")
155
+ self.config_manager.remove_config(self.test_config_name)
156
+
157
+ # Create new test configuration
158
+ console.print(f"[blue]Creating temporary test configuration: {self.test_config_name}[/blue]")
159
+ console.print(f"[dim]Debug: Test config data projects: {test_config_data['projects']}[/dim]")
160
+ self.config_manager.add_config(self.test_config_name, test_config_data)
161
+
162
+ # Switch to test configuration
163
+ self.config_manager.switch_config(self.test_config_name)
164
+ console.print(f"[green]✅ Switched to test configuration: {self.test_config_name}[/green]")
165
+
166
+ # CRITICAL FIX: Update the data_config_dir after switching configurations
167
+ # This is the root cause - data_config_dir is set once and never updated
168
+ self.config_manager.data_config_dir = self.config_manager.data_dir / self.test_config_name
169
+ self.config_manager.data_config_dir.mkdir(parents=True, exist_ok=True)
170
+ console.print(f"[dim]Debug: Updated data_config_dir to: {self.config_manager.data_config_dir}[/dim]")
171
+
172
+ # Clear any potential caches in the config manager
173
+ if hasattr(self.config_manager, "_cached_config"):
174
+ self.config_manager._cached_config = None
175
+ if hasattr(self.config_manager, "cache"):
176
+ self.config_manager.cache.clear()
177
+
178
+ # Create fresh StateService with the updated configuration and directory
179
+ fresh_config = self.config_manager.get_config(self.test_config_name)
180
+ service.current_state = service.StateService(fresh_config)
181
+ console.print(f"[dim]Debug: Created fresh StateService for {self.test_config_name}[/dim]")
182
+
183
+ # Debug: Verify the fix worked
184
+ console.print(
185
+ f"[dim]Debug: StateService universe base_dir: {service.current_state.universe.base_dir}[/dim]"
186
+ )
187
+ console.print(
188
+ f"[dim]Debug: StateService universe local_path: {service.current_state.universe.local_path}[/dim]"
189
+ )
190
+
191
+ if esgvoc_branch:
192
+ console.print(f"[dim]Using esgvoc library from branch: {esgvoc_branch}[/dim]")
193
+
194
+ return True
195
+
196
+ except Exception as e:
197
+ console.print(f"[red]❌ Configuration failed: {e}[/red]")
198
+ import traceback
199
+
200
+ console.print(traceback.format_exc())
201
+ return False
202
+
203
+ def synchronize_cvs(self) -> bool:
204
+ """Synchronize/download the configured CVs"""
205
+ try:
206
+ console.print("[blue]Synchronizing CVs...[/blue]")
207
+
208
+ # Force refresh the state service to ensure it uses the correct configuration
209
+ service.current_state = service.get_state()
210
+
211
+ # Debug: Show what configuration the state service is using
212
+ config_manager = service.get_config_manager()
213
+ active_config = config_manager.get_active_config_name()
214
+ console.print(f"[dim]Debug: Active config during sync: {active_config}[/dim]")
215
+ console.print(f"[dim]Debug: Expected config: {self.test_config_name}[/dim]")
216
+ console.print(f"[dim]Debug: Data config dir during sync: {config_manager.data_config_dir}[/dim]")
217
+
218
+ if active_config != self.test_config_name:
219
+ console.print(
220
+ f"[yellow]⚠️ Warning: Active config mismatch, forcing switch to {self.test_config_name}[/yellow]"
221
+ )
222
+ config_manager.switch_config(self.test_config_name)
223
+
224
+ # Update data_config_dir after forced switch
225
+ config_manager.data_config_dir = config_manager.data_dir / self.test_config_name
226
+ config_manager.data_config_dir.mkdir(parents=True, exist_ok=True)
227
+
228
+ # Clear caches again after forced switch
229
+ if hasattr(config_manager, "_cached_config"):
230
+ config_manager._cached_config = None
231
+ if hasattr(config_manager, "cache"):
232
+ config_manager.cache.clear()
233
+
234
+ # Create fresh StateService with correct configuration
235
+ fresh_config = config_manager.get_config(self.test_config_name)
236
+ service.current_state = StateService(fresh_config)
237
+ console.print(f"[dim]Debug: Recreated StateService for {self.test_config_name}[/dim]")
238
+
239
+ service.current_state.synchronize_all()
240
+ console.print("[green]✅ CVs synchronized successfully[/green]")
241
+ return True
242
+ except Exception as e:
243
+ console.print(f"[red]❌ CV synchronization failed: {e}[/red]")
244
+ import traceback
245
+
246
+ console.print(traceback.format_exc())
247
+ return False
248
+
249
+ def test_repository_structure(self, repo_path: str = ".") -> bool:
250
+ """
251
+ Test repository structure and file requirements
252
+
253
+ Args:
254
+ repo_path: Path to the repository to test (default: current directory)
255
+
256
+ Returns:
257
+ bool: True if all tests pass
258
+ """
259
+ console.print(f"[blue]🧪 Testing repository structure in: {repo_path}[/blue]")
260
+
261
+ repo_dir = Path(repo_path)
262
+ if not repo_dir.exists():
263
+ console.print(f"[red]❌ Repository path does not exist: {repo_path}[/red]")
264
+ return False
265
+
266
+ errors = []
267
+ warnings = []
268
+
269
+ # Get all directories
270
+ all_directories = [p for p in repo_dir.iterdir() if p.is_dir()]
271
+
272
+ # Identify collection directories by presence of .jsonld files
273
+ collection_directories = []
274
+ directories_with_json_but_no_jsonld = []
275
+
276
+ for directory in all_directories:
277
+ files_in_dir = list(directory.iterdir())
278
+ jsonld_files = [f for f in files_in_dir if f.name.endswith(".jsonld")]
279
+ json_files = [f for f in files_in_dir if f.name.endswith(".json") and not f.name.endswith(".jsonld")]
280
+
281
+ if len(jsonld_files) > 0:
282
+ collection_directories.append(directory)
283
+ elif len(json_files) > 0:
284
+ directories_with_json_but_no_jsonld.append(directory)
285
+
286
+ console.print(f"Found {len(collection_directories)} collection directories (with .jsonld files)")
287
+
288
+ # Warn about directories that might be missing context files
289
+ for directory in directories_with_json_but_no_jsonld:
290
+ warnings.append(f"⚠️ Directory '{directory.name}' has .json files but no .jsonld context")
291
+
292
+ # Test each collection directory
293
+ for directory in collection_directories:
294
+ console.print(f"📁 Testing collection: {directory.name}")
295
+ collection_errors = self._test_collection_directory(directory)
296
+ errors.extend(collection_errors)
297
+
298
+ # Add context validation warnings (only if collection passed basic validation)
299
+ if not collection_errors:
300
+ context_warnings = self._validate_context_usage(directory, directory.name)
301
+ for warning in context_warnings:
302
+ console.print(f" {warning}")
303
+
304
+ # Test YAML specification files if they exist
305
+ yaml_specs_errors = self._test_yaml_specs(repo_dir, collection_directories)
306
+ errors.extend(yaml_specs_errors)
307
+
308
+ # Display warnings
309
+ if warnings:
310
+ console.print(f"\n[yellow]Warnings ({len(warnings)}):[/yellow]")
311
+ for warning in warnings:
312
+ console.print(f" {warning}")
313
+
314
+ # Summary
315
+ if errors:
316
+ console.print(f"\n[red]❌ Repository structure validation failed with {len(errors)} errors:[/red]")
317
+ for error in errors:
318
+ console.print(f" {error}")
319
+ return False
320
+ else:
321
+ console.print("\n[green]✅ Repository structure validation passed![/green]")
322
+ console.print(f"✅ Validated {len(collection_directories)} collection directories")
323
+ return True
324
+
325
+ def _test_collection_directory(self, directory: Path) -> List[str]:
326
+ """Test a single collection directory"""
327
+ errors = []
328
+
329
+ files_in_dir = list(directory.iterdir())
330
+ jsonld_files = [f for f in files_in_dir if f.name.endswith(".jsonld")]
331
+ other_files = [f for f in files_in_dir if not f.name.endswith(".jsonld")]
332
+
333
+ # Test directory structure
334
+ if len(jsonld_files) == 0:
335
+ errors.append(f"❌ {directory.name}: No .jsonld context file found")
336
+ elif len(jsonld_files) > 1:
337
+ console.print(f" [yellow]⚠️ Multiple .jsonld files: {[f.name for f in jsonld_files]}[/yellow]")
338
+
339
+ if len(other_files) == 0:
340
+ errors.append(f"❌ {directory.name}: No element files found")
341
+
342
+ # Test JSONLD context files
343
+ for jsonld_file in jsonld_files:
344
+ try:
345
+ with open(jsonld_file, "r", encoding="utf-8") as f:
346
+ jsonld_content = json.load(f)
347
+
348
+ if "@context" not in jsonld_content:
349
+ errors.append(f"❌ {jsonld_file.name}: Missing '@context' field")
350
+ continue
351
+
352
+ context = jsonld_content["@context"]
353
+ if not isinstance(context, dict):
354
+ errors.append(f"❌ {jsonld_file.name}: '@context' must be a dictionary")
355
+ continue
356
+
357
+ # Check required context fields
358
+ required_fields = ["id", "type", "@base"]
359
+ missing_fields = [field for field in required_fields if field not in context]
360
+ if missing_fields:
361
+ errors.append(f"❌ {jsonld_file.name}: Missing required fields in @context: {missing_fields}")
362
+
363
+ except json.JSONDecodeError as e:
364
+ errors.append(f"❌ {jsonld_file.name}: Invalid JSON syntax - {e}")
365
+ except Exception as e:
366
+ errors.append(f"❌ {jsonld_file.name}: Error reading file - {e}")
367
+
368
+ # Test element files
369
+ json_element_files = [f for f in other_files if f.name.endswith(".json")]
370
+ for element_file in json_element_files:
371
+ try:
372
+ with open(element_file, "r", encoding="utf-8") as f:
373
+ element_content = json.load(f)
374
+
375
+ required_fields = ["id", "type", "@context"]
376
+ missing_fields = [field for field in required_fields if field not in element_content]
377
+ if missing_fields:
378
+ errors.append(f"❌ {element_file.name}: Missing required fields: {missing_fields}")
379
+
380
+ except json.JSONDecodeError as e:
381
+ errors.append(f"❌ {element_file.name}: Invalid JSON syntax - {e}")
382
+ except Exception as e:
383
+ errors.append(f"❌ {element_file.name}: Error reading file - {e}")
384
+
385
+ if not errors:
386
+ console.print(f" [green]✅ Collection '{directory.name}' passed validation[/green]")
387
+
388
+ return errors
389
+
390
+ def _test_yaml_specs(self, repo_dir: Path, collection_directories: List[Path]) -> List[str]:
391
+ """Test YAML specification files (project_specs.yaml, drs_specs.yaml, catalog_spec.yaml, attr_specs.yaml)"""
392
+ errors = []
393
+
394
+ # Import constants and YAML handling
395
+ try:
396
+ import yaml
397
+ from esgvoc.core.constants import (
398
+ PROJECT_SPECS_FILENAME,
399
+ DRS_SPECS_FILENAME,
400
+ CATALOG_SPECS_FILENAME,
401
+ ATTRIBUTES_SPECS_FILENAME
402
+ )
403
+ except ImportError as e:
404
+ errors.append(f"❌ Missing required dependencies: {e}")
405
+ return errors
406
+
407
+ # Get existing collections for validation
408
+ existing_collections = {d.name for d in collection_directories}
409
+ source_collections = set()
410
+ files_tested = 0
411
+
412
+ # Test project_specs.yaml
413
+ project_specs_file = repo_dir / PROJECT_SPECS_FILENAME
414
+ if project_specs_file.exists():
415
+ console.print(f"📄 Testing {PROJECT_SPECS_FILENAME}...")
416
+ try:
417
+ with open(project_specs_file, "r", encoding="utf-8") as f:
418
+ project_specs = yaml.safe_load(f)
419
+ console.print(f" [green]✅ {PROJECT_SPECS_FILENAME} parsed successfully[/green]")
420
+ files_tested += 1
421
+ except yaml.YAMLError as e:
422
+ errors.append(f"❌ {PROJECT_SPECS_FILENAME}: Invalid YAML syntax - {e}")
423
+ except Exception as e:
424
+ errors.append(f"❌ Error reading {PROJECT_SPECS_FILENAME}: {e}")
425
+ else:
426
+ errors.append(f"❌ Required file {PROJECT_SPECS_FILENAME} not found")
427
+
428
+ # Test drs_specs.yaml
429
+ drs_specs_file = repo_dir / DRS_SPECS_FILENAME
430
+ if drs_specs_file.exists():
431
+ console.print(f"📄 Testing {DRS_SPECS_FILENAME}...")
432
+ try:
433
+ with open(drs_specs_file, "r", encoding="utf-8") as f:
434
+ drs_specs = yaml.safe_load(f)
435
+
436
+ # Extract collection references from DRS specs
437
+ for drs_name, drs_spec in drs_specs.items():
438
+ if isinstance(drs_spec, dict) and "parts" in drs_spec:
439
+ for part in drs_spec["parts"]:
440
+ if isinstance(part, dict):
441
+ # Handle both old format (collection_id) and new format (source_collection)
442
+ collection_ref = part.get("collection_id") or part.get("source_collection")
443
+ if collection_ref:
444
+ source_collections.add(collection_ref)
445
+
446
+ console.print(f" [green]✅ {DRS_SPECS_FILENAME} parsed successfully[/green]")
447
+ files_tested += 1
448
+ except yaml.YAMLError as e:
449
+ errors.append(f"❌ {DRS_SPECS_FILENAME}: Invalid YAML syntax - {e}")
450
+ except Exception as e:
451
+ errors.append(f"❌ Error reading {DRS_SPECS_FILENAME}: {e}")
452
+ else:
453
+ errors.append(f"❌ Required file {DRS_SPECS_FILENAME} not found")
454
+
455
+ # Test catalog_spec.yaml (optional)
456
+ catalog_specs_file = repo_dir / CATALOG_SPECS_FILENAME
457
+ if catalog_specs_file.exists():
458
+ console.print(f"📄 Testing {CATALOG_SPECS_FILENAME}...")
459
+ try:
460
+ with open(catalog_specs_file, "r", encoding="utf-8") as f:
461
+ catalog_specs = yaml.safe_load(f)
462
+
463
+ # Extract collection references from catalog specs
464
+ if isinstance(catalog_specs, dict):
465
+ # Check dataset_properties and file_properties
466
+ for prop_type in ["dataset_properties", "file_properties"]:
467
+ if prop_type in catalog_specs and isinstance(catalog_specs[prop_type], list):
468
+ for prop in catalog_specs[prop_type]:
469
+ if isinstance(prop, dict) and "source_collection" in prop:
470
+ source_collections.add(prop["source_collection"])
471
+
472
+ console.print(f" [green]✅ {CATALOG_SPECS_FILENAME} parsed successfully[/green]")
473
+ files_tested += 1
474
+ except yaml.YAMLError as e:
475
+ errors.append(f"❌ {CATALOG_SPECS_FILENAME}: Invalid YAML syntax - {e}")
476
+ except Exception as e:
477
+ errors.append(f"❌ Error reading {CATALOG_SPECS_FILENAME}: {e}")
478
+ else:
479
+ console.print(f" [yellow]⚠️ Optional file {CATALOG_SPECS_FILENAME} not found[/yellow]")
480
+
481
+ # Test attr_specs.yaml (currently not ingested by esgvoc, but test for syntax)
482
+ attr_specs_file = repo_dir / ATTRIBUTES_SPECS_FILENAME
483
+ if attr_specs_file.exists():
484
+ console.print(f"📄 Testing {ATTRIBUTES_SPECS_FILENAME} (syntax only - not ingested by esgvoc)...")
485
+ try:
486
+ with open(attr_specs_file, "r", encoding="utf-8") as f:
487
+ attr_specs = yaml.safe_load(f)
488
+
489
+ # Extract collection references from attribute specs if they exist
490
+ if isinstance(attr_specs, dict):
491
+ # Check for global_attributes_specs or similar structures
492
+ if "specs" in attr_specs:
493
+ specs = attr_specs["specs"]
494
+ if isinstance(specs, dict):
495
+ for attr_name, attr_spec in specs.items():
496
+ if isinstance(attr_spec, dict) and "source_collection" in attr_spec:
497
+ source_collections.add(attr_spec["source_collection"])
498
+
499
+ console.print(f" [green]✅ {ATTRIBUTES_SPECS_FILENAME} parsed successfully[/green]")
500
+ console.print(f" [yellow]⚠️ Note: {ATTRIBUTES_SPECS_FILENAME} is not currently ingested by esgvoc[/yellow]")
501
+ files_tested += 1
502
+ except yaml.YAMLError as e:
503
+ errors.append(f"❌ {ATTRIBUTES_SPECS_FILENAME}: Invalid YAML syntax - {e}")
504
+ except Exception as e:
505
+ errors.append(f"❌ Error reading {ATTRIBUTES_SPECS_FILENAME}: {e}")
506
+ else:
507
+ console.print(f" [yellow]⚠️ Optional file {ATTRIBUTES_SPECS_FILENAME} not found[/yellow]")
508
+
509
+ # Validate collection references
510
+ if source_collections:
511
+ console.print(f" Found {len(source_collections)} source_collection references")
512
+
513
+ for collection in source_collections:
514
+ if collection not in existing_collections:
515
+ errors.append(f"❌ YAML specs reference non-existent collection: '{collection}'")
516
+ else:
517
+ console.print(f" [green]✅ Reference '{collection}' exists[/green]")
518
+ else:
519
+ console.print(" [yellow]⚠️ No collection references found in YAML specs[/yellow]")
520
+
521
+ if files_tested == 0:
522
+ errors.append("❌ No YAML specification files found")
523
+ else:
524
+ console.print(f" [blue]📊 Successfully tested {files_tested} YAML specification files[/blue]")
525
+
526
+ return errors
527
+
528
+ def _debug_missing_term(self, project_name: str, collection_name: str, term_id: str, repo_path: str = "."):
529
+ """
530
+ Provide detailed debugging information for a missing term.
531
+
532
+ Args:
533
+ project_name: Name of the project
534
+ collection_name: Name of the collection
535
+ term_id: ID of the missing term
536
+ repo_path: Path to the repository
537
+ """
538
+ console.print(f"\n[bold yellow]🔍 Debugging missing term: {term_id} in {collection_name}[/bold yellow]")
539
+
540
+ repo_dir = Path(repo_path)
541
+ collection_dir = repo_dir / collection_name
542
+
543
+ # 1. Check if term exists in project repository
544
+ term_file = collection_dir / f"{term_id}.json"
545
+ console.print(f"\n[blue]📁 Project Repository ({project_name}):[/blue]")
546
+
547
+ if term_file.exists():
548
+ try:
549
+ with open(term_file, "r", encoding="utf-8") as f:
550
+ term_content = json.load(f)
551
+ console.print(f" [green]✅ Term found in project: {term_file}[/green]")
552
+ console.print(" [dim]Content:[/dim]")
553
+ formatted_json = json.dumps(term_content, indent=2, ensure_ascii=False)
554
+ for line in formatted_json.split("\n"):
555
+ console.print(f" {line}")
556
+ except Exception as e:
557
+ console.print(f" [red]❌ Error reading term file: {e}[/red]")
558
+ else:
559
+ console.print(f" [red]❌ Term not found in project: {term_file}[/red]")
560
+
561
+ # Try to find the term by searching for files that contain this term_id
562
+ console.print(f" [dim]Searching for files containing term ID '{term_id}'...[/dim]")
563
+ try:
564
+ for json_file in collection_dir.glob("*.json"):
565
+ if json_file.name.endswith(".jsonld"):
566
+ continue
567
+ try:
568
+ with open(json_file, "r", encoding="utf-8") as f:
569
+ content = json.load(f)
570
+ if content.get("id") == term_id:
571
+ console.print(f" [yellow]📄 Found term ID '{term_id}' in file: {json_file.name}[/yellow]")
572
+ console.print(f" [dim]Note: Filename '{json_file.name}' ≠ expected '{term_id}.json'[/dim]")
573
+ console.print(" [dim]Content:[/dim]")
574
+ formatted_json = json.dumps(content, indent=2, ensure_ascii=False)
575
+ for line in formatted_json.split("\n"):
576
+ console.print(f" {line}")
577
+ break
578
+ except Exception:
579
+ continue
580
+ else:
581
+ console.print(f" [dim]No file found containing term ID '{term_id}'[/dim]")
582
+ except Exception as e:
583
+ console.print(f" [dim]Error searching for term: {e}[/dim]")
584
+
585
+ # 2. Check if term exists in universe (using DataMerger to resolve links)
586
+ try:
587
+ current_state = service.get_state()
588
+ if hasattr(current_state, "universe") and current_state.universe.local_path:
589
+ universe_dir = Path(current_state.universe.local_path)
590
+
591
+ console.print(f"\n[blue]🌌 Universe Repository (resolved via DataMerger):[/blue]")
592
+
593
+ # First, try to use DataMerger to resolve the universe term if project term exists
594
+ resolved_universe_term = None
595
+ universe_term_path = None
596
+ project_term_content = None
597
+
598
+ if term_file.exists():
599
+ try:
600
+ # First, read the project term to see what it links to
601
+ with open(term_file, "r", encoding="utf-8") as f:
602
+ project_term_content = json.load(f)
603
+
604
+ from esgvoc.core.data_handler import JsonLdResource
605
+ from esgvoc.core.service.data_merger import DataMerger
606
+
607
+ # Use DataMerger to resolve the universe term like in project_ingestion.py
608
+ locally_avail = {
609
+ "https://espri-mod.github.io/mip-cmor-tables": str(current_state.universe.local_path)
610
+ }
611
+
612
+ console.print(f" [dim]Attempting DataMerger resolution...[/dim]")
613
+
614
+ # Check if project term has an @id link
615
+ if "@id" in project_term_content:
616
+ console.print(f" [dim]Project term @id: {project_term_content['@id']}[/dim]")
617
+
618
+ # Calculate expected universe path
619
+ if "https://espri-mod.github.io/mip-cmor-tables" in project_term_content["@id"]:
620
+ universe_relative_path = project_term_content["@id"].replace(
621
+ "https://espri-mod.github.io/mip-cmor-tables/", ""
622
+ )
623
+ if not universe_relative_path.endswith(".json"):
624
+ universe_relative_path += ".json"
625
+ universe_term_path = universe_dir / universe_relative_path
626
+ console.print(f" [dim]Expected universe path: {universe_term_path}[/dim]")
627
+ else:
628
+ console.print(f" [dim]Project term has no @id link to universe[/dim]")
629
+ # Even without @id, try to infer the universe path from context base
630
+ try:
631
+ # Read the context file to get the base
632
+ context_file = term_file.parent / "000_context.jsonld"
633
+ if context_file.exists():
634
+ with open(context_file, "r", encoding="utf-8") as f:
635
+ context_content = json.load(f)
636
+
637
+ base_url = context_content.get("@context", {}).get("@base", "")
638
+ if base_url and "https://espri-mod.github.io/mip-cmor-tables" in base_url:
639
+ universe_relative_path = (
640
+ base_url.replace("https://espri-mod.github.io/mip-cmor-tables/", "")
641
+ + f"{term_id}.json"
642
+ )
643
+ universe_term_path = universe_dir / universe_relative_path
644
+ console.print(f" [dim]Inferred from context @base: {universe_term_path}[/dim]")
645
+ except Exception as e:
646
+ console.print(f" [dim]Could not infer universe path from context: {e}[/dim]")
647
+
648
+ # Debug: Check what the JsonLdResource expansion produces
649
+ json_resource = JsonLdResource(uri=str(term_file))
650
+ console.print(f" [dim]JSON-LD expanded form: {json_resource.expanded}[/dim]")
651
+
652
+ merger_result = DataMerger(
653
+ data=json_resource,
654
+ locally_available=locally_avail,
655
+ ).merge_linked_json()
656
+
657
+ if merger_result and len(merger_result) > 1:
658
+ # If we have more than one result, the last one is the fully merged term
659
+ resolved_universe_term = merger_result[-1]
660
+
661
+ console.print(f" [green]✅ Term resolved via DataMerger (merged from universe)[/green]")
662
+ if universe_term_path:
663
+ console.print(f" [dim]Resolved universe path: {universe_term_path}[/dim]")
664
+ console.print(
665
+ f" [dim]Universe file exists: {universe_term_path.exists() if universe_term_path else 'N/A'}[/dim]"
666
+ )
667
+ console.print(" [dim]Merged content:[/dim]")
668
+ formatted_json = json.dumps(resolved_universe_term, indent=2, ensure_ascii=False)
669
+ for line in formatted_json.split("\n"):
670
+ console.print(f" {line}")
671
+ else:
672
+ console.print(
673
+ f" [yellow]⚠️ No universe term linked from project term (merge result length: {len(merger_result) if merger_result else 0})[/yellow]"
674
+ )
675
+
676
+ except Exception as e:
677
+ console.print(f" [red]❌ Error using DataMerger to resolve universe term: {e}[/red]")
678
+ # Still show what the project term was trying to link to
679
+ if project_term_content and "@id" in project_term_content:
680
+ console.print(
681
+ f" [dim]Project term was trying to link to: {project_term_content['@id']}[/dim]"
682
+ )
683
+ universe_relative_path = project_term_content["@id"].replace(
684
+ "https://espri-mod.github.io/mip-cmor-tables/", ""
685
+ )
686
+ if not universe_relative_path.endswith(".json"):
687
+ universe_relative_path += ".json"
688
+ universe_term_path = universe_dir / universe_relative_path
689
+ console.print(
690
+ f" [dim]Expected universe file: {universe_term_path} (exists: {universe_term_path.exists() if universe_term_path else False})[/dim]"
691
+ )
692
+
693
+ # Fallback: also check direct universe path and show resolved universe file if it was calculated
694
+ if not resolved_universe_term:
695
+ # Show the resolved path from DataMerger if we have it
696
+ if universe_term_path and universe_term_path.exists():
697
+ try:
698
+ with open(universe_term_path, "r", encoding="utf-8") as f:
699
+ universe_term_content = json.load(f)
700
+ console.print(
701
+ f" [green]✅ Universe file found at resolved path: {universe_term_path}[/green]"
702
+ )
703
+ console.print(" [dim]Content:[/dim]")
704
+ formatted_json = json.dumps(universe_term_content, indent=2, ensure_ascii=False)
705
+ for line in formatted_json.split("\n"):
706
+ console.print(f" {line}")
707
+ except Exception as e:
708
+ console.print(f" [red]❌ Error reading resolved universe file: {e}[/red]")
709
+ else:
710
+ # Show detailed path info - don't try direct collection path since it's wrong
711
+ console.print(f" [red]❌ Term not found in universe:[/red]")
712
+ if universe_term_path:
713
+ console.print(
714
+ f" [dim]• DataMerger resolved path: {universe_term_path} (exists: {universe_term_path.exists()})[/dim]"
715
+ )
716
+
717
+ # Try direct collection-based path as fallback (but note this may be incorrect for project collections vs universe structure)
718
+ universe_collection_dir = universe_dir / collection_name
719
+ universe_term_file = universe_collection_dir / f"{term_id}.json"
720
+ console.print(
721
+ f" [dim]• Direct collection path: {universe_term_file} (exists: {universe_term_file.exists()})[/dim]"
722
+ )
723
+
724
+ # Try to find similar files in the universe to help debugging
725
+ try:
726
+ if universe_term_path:
727
+ parent_dir = universe_term_path.parent
728
+ if parent_dir.exists():
729
+ similar_files = [
730
+ f.name
731
+ for f in parent_dir.iterdir()
732
+ if f.is_file() and f.suffix == ".json" and term_id.lower() in f.name.lower()
733
+ ]
734
+ if similar_files:
735
+ console.print(
736
+ f" [dim]• Similar files in {parent_dir.name}: {similar_files}[/dim]"
737
+ )
738
+
739
+ # Also check if there are files with different casing
740
+ all_files = [
741
+ f.name for f in parent_dir.iterdir() if f.is_file() and f.suffix == ".json"
742
+ ]
743
+ casing_matches = [f for f in all_files if f.lower() == f"{term_id.lower()}.json"]
744
+ if casing_matches and casing_matches[0] != f"{term_id}.json":
745
+ console.print(
746
+ f" [dim]• Case mismatch found: {casing_matches[0]} vs {term_id}.json[/dim]"
747
+ )
748
+ except Exception:
749
+ pass
750
+ else:
751
+ console.print(f" [yellow]⚠️ Universe path not available[/yellow]")
752
+ except Exception as e:
753
+ console.print(f" [red]❌ Error accessing universe: {e}[/red]")
754
+
755
+ # 3. Try to query the term via esgvoc API
756
+ console.print(f"\n[blue]🔗 ESGVoc API Query:[/blue]")
757
+ try:
758
+ import esgvoc.api as ev
759
+
760
+ # Try to get the term from project
761
+ try:
762
+ project_terms = ev.get_all_terms_in_collection(project_name, collection_name)
763
+ matching_terms = [term for term in project_terms if term.id == term_id]
764
+ if matching_terms:
765
+ term = matching_terms[0]
766
+ console.print(f" [green]✅ Term found in esgvoc project API[/green]")
767
+ console.print(f" ID: {term.id}")
768
+ console.print(f" Type: {term.type}")
769
+ console.print(f" Label: {getattr(term, 'label', 'N/A')}")
770
+ console.print(f" Description: {getattr(term, 'description', 'N/A')[:100]}...")
771
+ else:
772
+ console.print(f" [red]❌ Term not found in esgvoc project API[/red]")
773
+ except Exception as e:
774
+ console.print(f" [red]❌ Error querying project API: {e}[/red]")
775
+
776
+ # Try to get the term from universe (if available)
777
+ try:
778
+ universe_terms = ev.get_all_terms_in_collection("universe", collection_name)
779
+ matching_universe_terms = [term for term in universe_terms if term.id == term_id]
780
+ if matching_universe_terms:
781
+ term = matching_universe_terms[0]
782
+ console.print(f" [green]✅ Term found in esgvoc universe API[/green]")
783
+ console.print(f" ID: {term.id}")
784
+ console.print(f" Type: {term.type}")
785
+ console.print(f" Label: {getattr(term, 'label', 'N/A')}")
786
+ console.print(f" Description: {getattr(term, 'description', 'N/A')[:100]}...")
787
+ else:
788
+ console.print(f" [red]❌ Term not found in esgvoc universe API[/red]")
789
+ except Exception as e:
790
+ console.print(f" [red]❌ Error querying universe API: {e}[/red]")
791
+
792
+ except Exception as e:
793
+ console.print(f" [red]❌ Error importing esgvoc API: {e}[/red]")
794
+
795
+ def _validate_context_usage(self, collection_dir: Path, collection_name: str) -> list:
796
+ """
797
+ Validate context usage and detect potential issues.
798
+
799
+ Returns:
800
+ list: List of warning messages
801
+ """
802
+ warnings = []
803
+
804
+ try:
805
+ context_file = collection_dir / "000_context.jsonld"
806
+ if not context_file.exists():
807
+ return warnings
808
+
809
+ # Read context
810
+ with open(context_file, "r", encoding="utf-8") as f:
811
+ context_data = json.load(f)
812
+
813
+ context_mappings = context_data.get("@context", {})
814
+ if not isinstance(context_mappings, dict):
815
+ return warnings
816
+
817
+ # Get all JSON term files
818
+ term_files = [f for f in collection_dir.glob("*.json") if not f.name.endswith(".jsonld")]
819
+
820
+ # Track context key usage
821
+ context_keys_used = set()
822
+ term_properties_used = set()
823
+ terms_using_base_expansion = []
824
+
825
+ for term_file in term_files:
826
+ try:
827
+ with open(term_file, "r", encoding="utf-8") as f:
828
+ term_content = json.load(f)
829
+
830
+ # Check what properties and values are used in the term
831
+ for key, value in term_content.items():
832
+ if key not in ["@context", "@id", "@type"]:
833
+ term_properties_used.add(key)
834
+
835
+ # Check if this property has a shortcut in context
836
+ if key in context_mappings:
837
+ context_keys_used.add(key)
838
+
839
+ # Check if property values use context shortcuts
840
+ # For example: "type": "source" where context has "source": "https://..."
841
+ if isinstance(value, str) and value in context_mappings:
842
+ context_keys_used.add(value)
843
+
844
+ # Check if term relies on @base expansion (has simple id but no explicit @id)
845
+ term_id = term_content.get("id", term_file.stem)
846
+ if "id" in term_content and "@id" not in term_content and "@base" in context_mappings:
847
+ terms_using_base_expansion.append({"file": term_file.name, "id": term_id})
848
+
849
+ except Exception as e:
850
+ continue
851
+
852
+ # Check for unused context keys (excluding standard JSON-LD keys)
853
+ standard_keys = {"@base", "@vocab", "@language", "@version", "id", "type"}
854
+ defined_keys = set(context_mappings.keys()) - standard_keys
855
+ unused_keys = defined_keys - context_keys_used
856
+
857
+ if unused_keys:
858
+ warnings.append(f"⚠️ Context defines unused keys in '{collection_name}': {sorted(unused_keys)}")
859
+
860
+ # Check for properties without shortcuts
861
+ properties_without_shortcuts = term_properties_used - context_keys_used - {"id", "type"}
862
+ if properties_without_shortcuts:
863
+ warnings.append(
864
+ f"⚠️ Properties used without context shortcuts in '{collection_name}': {sorted(properties_without_shortcuts)}"
865
+ )
866
+
867
+ # Check for filename/ID mismatches
868
+ filename_id_mismatches = []
869
+ for term_file in term_files:
870
+ try:
871
+ with open(term_file, "r", encoding="utf-8") as f:
872
+ term_content = json.load(f)
873
+
874
+ expected_id = term_file.stem # filename without .json extension
875
+ actual_id = term_content.get("id")
876
+
877
+ if actual_id and actual_id != expected_id:
878
+ filename_id_mismatches.append(
879
+ {"file": term_file.name, "expected_id": expected_id, "actual_id": actual_id}
880
+ )
881
+ except Exception:
882
+ continue
883
+
884
+ if filename_id_mismatches:
885
+ warnings.append(f"⚠️ Filename/ID mismatches in '{collection_name}':")
886
+ for mismatch in filename_id_mismatches[:5]: # Show first 5
887
+ warnings.append(
888
+ f" • {mismatch['file']}: id='{mismatch['actual_id']}' (expected '{mismatch['expected_id']}')"
889
+ )
890
+ if len(filename_id_mismatches) > 5:
891
+ warnings.append(f" • ... and {len(filename_id_mismatches) - 5} more mismatches")
892
+
893
+ # Base expansion is normal JSON-LD behavior - only report if there might be issues
894
+ # For now, we'll skip this since @base expansion is the expected pattern
895
+
896
+ # Only warn about @base vs shortcuts if they're used for the same purpose
897
+ # @base is for term identity URLs, shortcuts are for property/type values - this is normal
898
+ # We could add more sophisticated conflict detection here if needed
899
+
900
+ except Exception as e:
901
+ warnings.append(f"⚠️ Error validating context usage in '{collection_name}': {e}")
902
+
903
+ return warnings
904
+
905
+ def _validate_universe_warnings(self) -> bool:
906
+ """
907
+ Validate universe repository for potential issues and display warnings.
908
+
909
+ Returns:
910
+ bool: True if universe validation completed (warnings don't fail the test)
911
+ """
912
+ try:
913
+ current_state = service.get_state()
914
+ if not hasattr(current_state, "universe") or not current_state.universe.local_path:
915
+ console.print(f"[dim]⚠️ Universe path not available for validation[/dim]")
916
+ return True
917
+
918
+ universe_dir = Path(current_state.universe.local_path)
919
+ if not universe_dir.exists():
920
+ console.print(f"[dim]⚠️ Universe directory not found: {universe_dir}[/dim]")
921
+ return True
922
+
923
+ console.print(f"[blue]🌌 Validating Universe Repository: {universe_dir.name}[/blue]")
924
+
925
+ # Find universe collections (directories with JSON files)
926
+ universe_collections = []
927
+ for item in universe_dir.iterdir():
928
+ if item.is_dir():
929
+ json_files = list(item.glob("*.json"))
930
+ jsonld_files = [f for f in json_files if f.name.endswith(".jsonld")]
931
+ regular_json_files = [f for f in json_files if not f.name.endswith(".jsonld")]
932
+
933
+ if regular_json_files:
934
+ universe_collections.append(item)
935
+
936
+ console.print(f"Found {len(universe_collections)} universe collections to validate")
937
+
938
+ total_warnings = 0
939
+ for collection_dir in universe_collections:
940
+ warnings = self._validate_context_usage(collection_dir, collection_dir.name)
941
+ if warnings:
942
+ console.print(f"📁 Universe collection '{collection_dir.name}':")
943
+ for warning in warnings:
944
+ console.print(f" {warning}")
945
+ total_warnings += 1
946
+
947
+ if total_warnings == 0:
948
+ console.print("✅ No validation warnings found in universe")
949
+ else:
950
+ console.print(f"⚠️ Found {total_warnings} validation warnings in universe")
951
+
952
+ console.print("") # Add spacing before project validation
953
+ return True
954
+
955
+ except Exception as e:
956
+ console.print(f"[red]❌ Error validating universe: {e}[/red]")
957
+ return True # Don't fail the test for universe validation errors
958
+
959
+ def test_esgvoc_api_access(self, project_name: str, repo_path: str = ".") -> bool:
960
+ """
961
+ Test that all repository collections and elements are queryable via esgvoc API
962
+
963
+ Args:
964
+ project_name: Name of the project being tested
965
+ repo_path: Path to the repository (default: current directory)
966
+
967
+ Returns:
968
+ bool: True if all API tests pass
969
+ """
970
+ console.print(f"[blue]🔍 Testing esgvoc API access for project: {project_name}[/blue]")
971
+
972
+ try:
973
+ import esgvoc.api as ev
974
+ except ImportError as e:
975
+ console.print(f"[red]❌ Cannot import esgvoc.api: {e}[/red]")
976
+ return False
977
+
978
+ repo_dir = Path(repo_path)
979
+ errors = []
980
+
981
+ # Test 1: Verify project exists in esgvoc
982
+ try:
983
+ projects = ev.get_all_projects()
984
+ if project_name not in projects:
985
+ errors.append(f"❌ Project '{project_name}' not found in esgvoc. Available: {projects}")
986
+ return False
987
+ console.print(f"[green]✅ Project '{project_name}' found in esgvoc[/green]")
988
+ except Exception as e:
989
+ errors.append(f"❌ Failed to get projects from esgvoc: {e}")
990
+ return False
991
+
992
+ # Get repository collections
993
+ repo_collections = []
994
+ all_directories = [p for p in repo_dir.iterdir() if p.is_dir()]
995
+ for directory in all_directories:
996
+ files_in_dir = list(directory.iterdir())
997
+ jsonld_files = [f for f in files_in_dir if f.name.endswith(".jsonld")]
998
+ if len(jsonld_files) > 0:
999
+ repo_collections.append(directory.name)
1000
+
1001
+ # Test 2: Get collections from esgvoc
1002
+ try:
1003
+ # Debug: Check active configuration during API test
1004
+ current_active = service.get_config_manager().get_active_config_name()
1005
+ console.print(f"[dim]Debug: Active config during API test: {current_active}[/dim]")
1006
+
1007
+ esgvoc_collections = ev.get_all_collections_in_project(project_name)
1008
+ console.print(
1009
+ f"Found {len(esgvoc_collections)} collections in esgvoc, {len(repo_collections)} in repository"
1010
+ )
1011
+ except ValidationError as e:
1012
+ # Enhanced error reporting for Pydantic validation errors
1013
+ error_msg = f"❌ Validation error while processing collections for project '{project_name}'"
1014
+
1015
+ # Try to extract more context from the error
1016
+ if hasattr(e, "errors") and e.errors():
1017
+ for error in e.errors():
1018
+ if "input" in error and "ctx" in error:
1019
+ error_msg += f"\n • Invalid value: '{error['input']}'"
1020
+ if "enum_values" in error["ctx"]:
1021
+ error_msg += f"\n • Expected one of: {error['ctx']['enum_values']}"
1022
+ if error.get("type") == "enum":
1023
+ error_msg += f"\n • Field: {error.get('loc', 'unknown')}"
1024
+
1025
+ errors.append(error_msg)
1026
+ console.print(f"[red]{error_msg}[/red]")
1027
+ console.print(f"[dim]Full error details: {str(e)}[/dim]")
1028
+ return False
1029
+ except ValueError as e:
1030
+ # Enhanced error reporting for database validation issues
1031
+ error_str = str(e)
1032
+ if "collections with empty term_kind" in error_str:
1033
+ console.print(f"[red]❌ Database validation error for project '{project_name}':[/red]")
1034
+ console.print(f"[red]{error_str}[/red]")
1035
+ errors.append(f"❌ Invalid termkind values in database for project '{project_name}'")
1036
+ else:
1037
+ errors.append(f"❌ Failed to get collections from esgvoc: {e}")
1038
+ console.print(f"[red]API Error Details: {e}[/red]")
1039
+ return False
1040
+ except Exception as e:
1041
+ errors.append(f"❌ Failed to get collections from esgvoc: {e}")
1042
+ console.print(f"[red]API Error Details: {e}[/red]")
1043
+ return False
1044
+
1045
+ # Test 3: Verify each repository collection is queryable
1046
+ missing_in_esgvoc = []
1047
+ for collection_name in repo_collections:
1048
+ if collection_name not in esgvoc_collections:
1049
+ missing_in_esgvoc.append(collection_name)
1050
+ else:
1051
+ console.print(f" [green]✅ Collection '{collection_name}' found in esgvoc[/green]")
1052
+
1053
+ if missing_in_esgvoc:
1054
+ errors.append(f"❌ Collections in repository but not in esgvoc: {missing_in_esgvoc}")
1055
+
1056
+ # Test 4: Test elements in each collection
1057
+ for collection_name in repo_collections:
1058
+ if collection_name in esgvoc_collections:
1059
+ console.print(f"📂 Testing elements in collection: {collection_name}")
1060
+
1061
+ # Get repository elements
1062
+ collection_dir = repo_dir / collection_name
1063
+ json_files = [
1064
+ f for f in collection_dir.iterdir() if f.name.endswith(".json") and not f.name.endswith(".jsonld")
1065
+ ]
1066
+
1067
+ repo_elements = []
1068
+ repo_element_sources = {} # Track where each ID comes from
1069
+ for json_file in json_files:
1070
+ try:
1071
+ with open(json_file, "r", encoding="utf-8") as f:
1072
+ content = json.load(f)
1073
+ element_id = content.get("id", json_file.stem)
1074
+ repo_elements.append(element_id)
1075
+ repo_element_sources[element_id] = {"file": json_file.name, "from_id_field": "id" in content}
1076
+ except:
1077
+ element_id = json_file.stem
1078
+ repo_elements.append(element_id)
1079
+ repo_element_sources[element_id] = {"file": json_file.name, "from_id_field": False}
1080
+
1081
+ # Get esgvoc elements
1082
+ try:
1083
+ esgvoc_terms = ev.get_all_terms_in_collection(project_name, collection_name)
1084
+ esgvoc_element_ids = [term.id for term in esgvoc_terms]
1085
+
1086
+ console.print(f" Repository: {len(repo_elements)}, ESGVoc: {len(esgvoc_element_ids)} elements")
1087
+
1088
+ missing_elements = [elem for elem in repo_elements if elem not in esgvoc_element_ids]
1089
+ if missing_elements:
1090
+ errors.append(
1091
+ f"❌ Collection '{collection_name}': Elements missing from esgvoc: {missing_elements}"
1092
+ )
1093
+
1094
+ # Debug missing elements source tracking
1095
+ if self.debug_missing_terms:
1096
+ console.print(f" [dim]Missing elements and their sources:[/dim]")
1097
+ for elem in missing_elements:
1098
+ source_info = repo_element_sources.get(
1099
+ elem, {"file": "unknown", "from_id_field": False}
1100
+ )
1101
+ id_source = "id field" if source_info["from_id_field"] else "filename"
1102
+ console.print(f" [dim] • {elem} (from {source_info['file']} {id_source})[/dim]")
1103
+
1104
+ # Detailed debugging for each missing element (if enabled)
1105
+ if self.debug_missing_terms:
1106
+ console.print(
1107
+ f"\n[bold red]📋 Detailed analysis of missing elements in '{collection_name}':[/bold red]"
1108
+ )
1109
+ for missing_element in missing_elements:
1110
+ self._debug_missing_term(project_name, collection_name, missing_element, repo_path)
1111
+ else:
1112
+ console.print(f"[dim]💡 Use --debug-terms for detailed analysis of missing elements[/dim]")
1113
+ else:
1114
+ console.print(f" [green]✅ All elements in '{collection_name}' are queryable[/green]")
1115
+
1116
+ except Exception as e:
1117
+ errors.append(f"❌ Failed to get terms from collection '{collection_name}': {e}")
1118
+
1119
+ # Test 5: General API functions
1120
+ try:
1121
+ all_terms = ev.get_all_terms_in_all_projects()
1122
+ console.print(f"[blue]📊 ESGVoc API returned {len(all_terms)} total terms across all projects[/blue]")
1123
+ except Exception as e:
1124
+ errors.append(f"❌ Failed to get all terms from esgvoc: {e}")
1125
+
1126
+ # Summary
1127
+ if errors:
1128
+ console.print(f"\n[red]❌ ESGVoc API validation failed with {len(errors)} errors:[/red]")
1129
+ for error in errors:
1130
+ console.print(f" {error}")
1131
+ return False
1132
+ else:
1133
+ console.print("\n[green]✅ ESGVoc API validation passed![/green]")
1134
+ console.print(f"✅ Validated {len(repo_collections)} collections")
1135
+ console.print("✅ All repository elements accessible through esgvoc API")
1136
+ return True
1137
+
1138
+ def run_complete_test(
1139
+ self,
1140
+ project_name: str,
1141
+ repo_url: str = None,
1142
+ branch: str = None,
1143
+ repo_path: str = None,
1144
+ esgvoc_branch: str = None,
1145
+ universe_branch: str = None,
1146
+ ) -> bool:
1147
+ """
1148
+ Run complete CV testing pipeline
1149
+
1150
+ Args:
1151
+ project_name: Name of the project to test
1152
+ repo_url: Custom repository URL (optional)
1153
+ branch: Custom branch (optional)
1154
+ repo_path: Path to repository for structure testing (optional - auto-detected if not provided)
1155
+ esgvoc_branch: ESGVoc library branch (for info only)
1156
+ universe_branch: Custom universe branch (optional)
1157
+
1158
+ Returns:
1159
+ bool: True if all tests pass
1160
+ """
1161
+ console.print(f"[bold blue]🚀 Starting complete CV test for project: {project_name}[/bold blue]")
1162
+
1163
+ success = True
1164
+
1165
+ # Step 1: Configure esgvoc
1166
+ if not self.configure_for_testing(project_name, repo_url, branch, esgvoc_branch, universe_branch):
1167
+ return False
1168
+
1169
+ # Step 2: Synchronize CVs
1170
+ if not self.synchronize_cvs():
1171
+ success = False
1172
+
1173
+ # Step 2.5: Validate universe for warnings
1174
+ self._validate_universe_warnings()
1175
+
1176
+ # Step 3: Determine repository path AFTER synchronization - use downloaded CV repository if not specified
1177
+ if repo_path is None:
1178
+ # Use the state service to get the actual project path directly
1179
+ try:
1180
+ current_state = service.get_state()
1181
+ if hasattr(current_state, "projects") and project_name in current_state.projects:
1182
+ project_state = current_state.projects[project_name]
1183
+ if hasattr(project_state, "local_path") and project_state.local_path:
1184
+ repo_path = str(project_state.local_path)
1185
+ console.print(f"[blue]Using CV repository from state service: {repo_path}[/blue]")
1186
+ else:
1187
+ console.print("[dim]Debug: Project state has no local_path[/dim]")
1188
+ else:
1189
+ console.print(f"[dim]Debug: Project {project_name} not found in state service projects[/dim]")
1190
+ console.print(
1191
+ f"[dim]Debug: Available projects in state: {list(current_state.projects.keys()) if hasattr(current_state, 'projects') else 'No projects'}[/dim]"
1192
+ )
1193
+ except Exception as e:
1194
+ console.print(f"[dim]Debug: Error accessing state service: {e}[/dim]")
1195
+
1196
+ # Fallback: try to find the repository using the known default local path
1197
+ if repo_path is None:
1198
+ try:
1199
+ from esgvoc.core.service.configuration.setting import ServiceSettings
1200
+
1201
+ if project_name in ServiceSettings.DEFAULT_PROJECT_CONFIGS:
1202
+ default_local_path = ServiceSettings.DEFAULT_PROJECT_CONFIGS[project_name]["local_path"]
1203
+ config_manager = service.get_config_manager()
1204
+
1205
+ # Try different path constructions to find where the repository actually is
1206
+ possible_paths = [
1207
+ config_manager.data_config_dir / default_local_path,
1208
+ config_manager.data_dir / self.test_config_name / default_local_path,
1209
+ config_manager.data_dir / default_local_path,
1210
+ ]
1211
+
1212
+ # Also check in other configuration directories
1213
+ if config_manager.data_dir.exists():
1214
+ for config_dir in config_manager.data_dir.iterdir():
1215
+ if config_dir.is_dir():
1216
+ possible_repo_path = config_dir / default_local_path
1217
+ if possible_repo_path.exists():
1218
+ possible_paths.append(possible_repo_path)
1219
+
1220
+ for path in possible_paths:
1221
+ if path and path.exists():
1222
+ repo_path = str(path)
1223
+ console.print(f"[blue]Found CV repository at: {repo_path}[/blue]")
1224
+ break
1225
+ except Exception as e:
1226
+ console.print(f"[dim]Debug: Error in fallback path detection: {e}[/dim]")
1227
+
1228
+ # Final fallback
1229
+ if repo_path is None:
1230
+ repo_path = "."
1231
+ console.print("[yellow]⚠️ Could not determine CV repository path, using current directory[/yellow]")
1232
+
1233
+ # Step 3: Test repository structure
1234
+ if not self.test_repository_structure(repo_path):
1235
+ success = False
1236
+
1237
+ # Debug: Check what configuration is active before API test
1238
+ current_active = service.get_config_manager().get_active_config_name()
1239
+ console.print(f"[dim]Debug: Active config before API test: {current_active}[/dim]")
1240
+
1241
+ # Step 4: Test esgvoc API access
1242
+ if not self.test_esgvoc_api_access(project_name, repo_path):
1243
+ success = False
1244
+
1245
+ # Summary
1246
+ if success:
1247
+ console.print(f"\n[bold green]🎉 All tests passed for project '{project_name}'![/bold green]")
1248
+ else:
1249
+ console.print(f"\n[bold red]❌ Some tests failed for project '{project_name}'[/bold red]")
1250
+
1251
+ return success
1252
+
1253
+ def restore_original_configuration(self):
1254
+ """Restore the original esgvoc configuration"""
1255
+ try:
1256
+ if self.config_manager and self.original_config_name:
1257
+ # Switch back to original configuration
1258
+ console.print(f"[blue]Restoring original configuration: {self.original_config_name}[/blue]")
1259
+ self.config_manager.switch_config(self.original_config_name)
1260
+
1261
+ # CRITICAL: Restore the original data_config_dir
1262
+ self.config_manager.data_config_dir = self.config_manager.data_dir / self.original_config_name
1263
+ self.config_manager.data_config_dir.mkdir(parents=True, exist_ok=True)
1264
+ console.print(f"[dim]Debug: Restored data_config_dir to: {self.config_manager.data_config_dir}[/dim]")
1265
+
1266
+ # Reset service state
1267
+ service.current_state = service.get_state()
1268
+
1269
+ # Remove temporary test configuration
1270
+ configs = self.config_manager.list_configs()
1271
+ if self.test_config_name in configs:
1272
+ console.print(f"[blue]Removing temporary test configuration: {self.test_config_name}[/blue]")
1273
+ self.config_manager.remove_config(self.test_config_name)
1274
+
1275
+ console.print(f"[green]✅ Restored original configuration: {self.original_config_name}[/green]")
1276
+ except Exception as e:
1277
+ console.print(f"[yellow]⚠️ Error restoring original configuration: {e}[/yellow]")
1278
+
1279
+ def cleanup(self):
1280
+ """Cleanup resources and restore original configuration"""
1281
+ self.restore_original_configuration()
1282
+
1283
+
1284
+ def main():
1285
+ """Main CLI interface"""
1286
+ if len(sys.argv) < 2:
1287
+ print("Usage: cv_tester.py <command> [options]")
1288
+ print("\nCommands:")
1289
+ print(" list - List available projects")
1290
+ print(" configure <project> - Configure esgvoc for testing")
1291
+ print(" test <project> - Run complete test suite")
1292
+ print(" structure <path> - Test repository structure only")
1293
+ print(" api <project> <path> - Test esgvoc API access only")
1294
+ print("\nEnvironment variables:")
1295
+ print(" TEST_BRANCH - Custom project branch to test")
1296
+ print(" REPO_URL - Custom repository URL")
1297
+ print(" UNIVERSE_BRANCH - Custom universe branch to test")
1298
+ print(" ESGVOC_LIBRARY_BRANCH - ESGVoc library branch (for info)")
1299
+ sys.exit(1)
1300
+
1301
+ command = sys.argv[1]
1302
+ tester = CVTester()
1303
+
1304
+ try:
1305
+ if command == "list":
1306
+ projects = tester.get_available_projects()
1307
+ console.print(f"[blue]Available projects ({len(projects)}):[/blue]")
1308
+ for project in projects:
1309
+ config = ServiceSettings.DEFAULT_PROJECT_CONFIGS[project]
1310
+ console.print(f" [cyan]{project}[/cyan] - {config['github_repo']} (branch: {config['branch']})")
1311
+
1312
+ elif command == "configure":
1313
+ if len(sys.argv) < 3:
1314
+ console.print("[red]Error: Project name required[/red]")
1315
+ sys.exit(1)
1316
+
1317
+ project_name = sys.argv[2]
1318
+ repo_url = os.environ.get("REPO_URL")
1319
+ branch = os.environ.get("TEST_BRANCH")
1320
+ esgvoc_branch = os.environ.get("ESGVOC_LIBRARY_BRANCH")
1321
+
1322
+ if tester.configure_for_testing(project_name, repo_url, branch, esgvoc_branch):
1323
+ if tester.synchronize_cvs():
1324
+ console.print("[green]✅ Configuration complete[/green]")
1325
+ else:
1326
+ sys.exit(1)
1327
+ else:
1328
+ sys.exit(1)
1329
+
1330
+ elif command == "test":
1331
+ if len(sys.argv) < 3:
1332
+ console.print("[red]Error: Project name required[/red]")
1333
+ sys.exit(1)
1334
+
1335
+ project_name = sys.argv[2]
1336
+ repo_url = os.environ.get("REPO_URL")
1337
+ branch = os.environ.get("TEST_BRANCH")
1338
+ repo_path = sys.argv[3] if len(sys.argv) > 3 else "."
1339
+ esgvoc_branch = os.environ.get("ESGVOC_LIBRARY_BRANCH")
1340
+
1341
+ success = tester.run_complete_test(project_name, repo_url, branch, repo_path, esgvoc_branch)
1342
+ sys.exit(0 if success else 1)
1343
+
1344
+ elif command == "structure":
1345
+ repo_path = sys.argv[2] if len(sys.argv) > 2 else "."
1346
+ success = tester.test_repository_structure(repo_path)
1347
+ sys.exit(0 if success else 1)
1348
+
1349
+ elif command == "api":
1350
+ if len(sys.argv) < 3:
1351
+ console.print("[red]Error: Project name required[/red]")
1352
+ sys.exit(1)
1353
+
1354
+ project_name = sys.argv[2]
1355
+ repo_path = sys.argv[3] if len(sys.argv) > 3 else "."
1356
+ success = tester.test_esgvoc_api_access(project_name, repo_path)
1357
+ sys.exit(0 if success else 1)
1358
+
1359
+ else:
1360
+ console.print(f"[red]Error: Unknown command '{command}'[/red]")
1361
+ sys.exit(1)
1362
+
1363
+ finally:
1364
+ tester.cleanup()
1365
+
1366
+
1367
+ if __name__ == "__main__":
1368
+ main()