esgvoc 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. esgvoc/__init__.py +3 -0
  2. esgvoc/api/__init__.py +91 -0
  3. esgvoc/api/data_descriptors/EMD_models/__init__.py +66 -0
  4. esgvoc/api/data_descriptors/EMD_models/arrangement.py +21 -0
  5. esgvoc/api/data_descriptors/EMD_models/calendar.py +5 -0
  6. esgvoc/api/data_descriptors/EMD_models/cell_variable_type.py +20 -0
  7. esgvoc/api/data_descriptors/EMD_models/component_type.py +5 -0
  8. esgvoc/api/data_descriptors/EMD_models/coordinate.py +52 -0
  9. esgvoc/api/data_descriptors/EMD_models/grid_mapping.py +19 -0
  10. esgvoc/api/data_descriptors/EMD_models/grid_region.py +19 -0
  11. esgvoc/api/data_descriptors/EMD_models/grid_type.py +19 -0
  12. esgvoc/api/data_descriptors/EMD_models/horizontal_computational_grid.py +56 -0
  13. esgvoc/api/data_descriptors/EMD_models/horizontal_grid_cells.py +230 -0
  14. esgvoc/api/data_descriptors/EMD_models/horizontal_subgrid.py +41 -0
  15. esgvoc/api/data_descriptors/EMD_models/horizontal_units.py +5 -0
  16. esgvoc/api/data_descriptors/EMD_models/model.py +139 -0
  17. esgvoc/api/data_descriptors/EMD_models/model_component.py +115 -0
  18. esgvoc/api/data_descriptors/EMD_models/reference.py +61 -0
  19. esgvoc/api/data_descriptors/EMD_models/resolution.py +48 -0
  20. esgvoc/api/data_descriptors/EMD_models/temporal_refinement.py +19 -0
  21. esgvoc/api/data_descriptors/EMD_models/truncation_method.py +17 -0
  22. esgvoc/api/data_descriptors/EMD_models/vertical_computational_grid.py +91 -0
  23. esgvoc/api/data_descriptors/EMD_models/vertical_coordinate.py +5 -0
  24. esgvoc/api/data_descriptors/EMD_models/vertical_units.py +19 -0
  25. esgvoc/api/data_descriptors/__init__.py +159 -0
  26. esgvoc/api/data_descriptors/activity.py +72 -0
  27. esgvoc/api/data_descriptors/archive.py +5 -0
  28. esgvoc/api/data_descriptors/area_label.py +30 -0
  29. esgvoc/api/data_descriptors/branded_suffix.py +30 -0
  30. esgvoc/api/data_descriptors/branded_variable.py +21 -0
  31. esgvoc/api/data_descriptors/citation_url.py +5 -0
  32. esgvoc/api/data_descriptors/contact.py +5 -0
  33. esgvoc/api/data_descriptors/conventions.py +28 -0
  34. esgvoc/api/data_descriptors/creation_date.py +18 -0
  35. esgvoc/api/data_descriptors/data_descriptor.py +127 -0
  36. esgvoc/api/data_descriptors/data_specs_version.py +25 -0
  37. esgvoc/api/data_descriptors/date.py +5 -0
  38. esgvoc/api/data_descriptors/directory_date.py +22 -0
  39. esgvoc/api/data_descriptors/drs_specs.py +38 -0
  40. esgvoc/api/data_descriptors/experiment.py +215 -0
  41. esgvoc/api/data_descriptors/forcing_index.py +21 -0
  42. esgvoc/api/data_descriptors/frequency.py +48 -0
  43. esgvoc/api/data_descriptors/further_info_url.py +5 -0
  44. esgvoc/api/data_descriptors/grid.py +43 -0
  45. esgvoc/api/data_descriptors/horizontal_label.py +20 -0
  46. esgvoc/api/data_descriptors/initialization_index.py +27 -0
  47. esgvoc/api/data_descriptors/institution.py +80 -0
  48. esgvoc/api/data_descriptors/known_branded_variable.py +75 -0
  49. esgvoc/api/data_descriptors/license.py +31 -0
  50. esgvoc/api/data_descriptors/member_id.py +9 -0
  51. esgvoc/api/data_descriptors/mip_era.py +26 -0
  52. esgvoc/api/data_descriptors/model_component.py +32 -0
  53. esgvoc/api/data_descriptors/models_test/models.py +17 -0
  54. esgvoc/api/data_descriptors/nominal_resolution.py +50 -0
  55. esgvoc/api/data_descriptors/obs_type.py +5 -0
  56. esgvoc/api/data_descriptors/organisation.py +22 -0
  57. esgvoc/api/data_descriptors/physics_index.py +21 -0
  58. esgvoc/api/data_descriptors/product.py +16 -0
  59. esgvoc/api/data_descriptors/publication_status.py +5 -0
  60. esgvoc/api/data_descriptors/realization_index.py +24 -0
  61. esgvoc/api/data_descriptors/realm.py +16 -0
  62. esgvoc/api/data_descriptors/regex.py +5 -0
  63. esgvoc/api/data_descriptors/region.py +35 -0
  64. esgvoc/api/data_descriptors/resolution.py +7 -0
  65. esgvoc/api/data_descriptors/source.py +120 -0
  66. esgvoc/api/data_descriptors/source_type.py +5 -0
  67. esgvoc/api/data_descriptors/sub_experiment.py +5 -0
  68. esgvoc/api/data_descriptors/table.py +28 -0
  69. esgvoc/api/data_descriptors/temporal_label.py +20 -0
  70. esgvoc/api/data_descriptors/time_range.py +17 -0
  71. esgvoc/api/data_descriptors/title.py +5 -0
  72. esgvoc/api/data_descriptors/tracking_id.py +67 -0
  73. esgvoc/api/data_descriptors/variable.py +56 -0
  74. esgvoc/api/data_descriptors/variant_label.py +25 -0
  75. esgvoc/api/data_descriptors/vertical_label.py +20 -0
  76. esgvoc/api/project_specs.py +143 -0
  77. esgvoc/api/projects.py +1253 -0
  78. esgvoc/api/py.typed +0 -0
  79. esgvoc/api/pydantic_handler.py +146 -0
  80. esgvoc/api/report.py +127 -0
  81. esgvoc/api/search.py +171 -0
  82. esgvoc/api/universe.py +434 -0
  83. esgvoc/apps/__init__.py +6 -0
  84. esgvoc/apps/cmor_tables/__init__.py +7 -0
  85. esgvoc/apps/cmor_tables/cvs_table.py +948 -0
  86. esgvoc/apps/drs/__init__.py +0 -0
  87. esgvoc/apps/drs/constants.py +2 -0
  88. esgvoc/apps/drs/generator.py +429 -0
  89. esgvoc/apps/drs/report.py +540 -0
  90. esgvoc/apps/drs/validator.py +312 -0
  91. esgvoc/apps/ga/__init__.py +104 -0
  92. esgvoc/apps/ga/example_usage.py +315 -0
  93. esgvoc/apps/ga/models/__init__.py +47 -0
  94. esgvoc/apps/ga/models/netcdf_header.py +306 -0
  95. esgvoc/apps/ga/models/validator.py +491 -0
  96. esgvoc/apps/ga/test_ga.py +161 -0
  97. esgvoc/apps/ga/validator.py +277 -0
  98. esgvoc/apps/jsg/json_schema_generator.py +341 -0
  99. esgvoc/apps/jsg/templates/template.jinja +241 -0
  100. esgvoc/apps/test_cv/README.md +214 -0
  101. esgvoc/apps/test_cv/__init__.py +0 -0
  102. esgvoc/apps/test_cv/cv_tester.py +1611 -0
  103. esgvoc/apps/test_cv/example_usage.py +216 -0
  104. esgvoc/apps/vr/__init__.py +12 -0
  105. esgvoc/apps/vr/build_variable_registry.py +71 -0
  106. esgvoc/apps/vr/example_usage.py +60 -0
  107. esgvoc/apps/vr/vr_app.py +333 -0
  108. esgvoc/cli/clean.py +304 -0
  109. esgvoc/cli/cmor.py +46 -0
  110. esgvoc/cli/config.py +1300 -0
  111. esgvoc/cli/drs.py +267 -0
  112. esgvoc/cli/find.py +138 -0
  113. esgvoc/cli/get.py +155 -0
  114. esgvoc/cli/install.py +41 -0
  115. esgvoc/cli/main.py +60 -0
  116. esgvoc/cli/offline.py +269 -0
  117. esgvoc/cli/status.py +79 -0
  118. esgvoc/cli/test_cv.py +258 -0
  119. esgvoc/cli/valid.py +147 -0
  120. esgvoc/core/constants.py +17 -0
  121. esgvoc/core/convert.py +0 -0
  122. esgvoc/core/data_handler.py +206 -0
  123. esgvoc/core/db/__init__.py +3 -0
  124. esgvoc/core/db/connection.py +40 -0
  125. esgvoc/core/db/models/mixins.py +25 -0
  126. esgvoc/core/db/models/project.py +102 -0
  127. esgvoc/core/db/models/universe.py +98 -0
  128. esgvoc/core/db/project_ingestion.py +231 -0
  129. esgvoc/core/db/universe_ingestion.py +172 -0
  130. esgvoc/core/exceptions.py +33 -0
  131. esgvoc/core/logging_handler.py +26 -0
  132. esgvoc/core/repo_fetcher.py +345 -0
  133. esgvoc/core/service/__init__.py +41 -0
  134. esgvoc/core/service/configuration/config_manager.py +196 -0
  135. esgvoc/core/service/configuration/setting.py +363 -0
  136. esgvoc/core/service/data_merger.py +634 -0
  137. esgvoc/core/service/esg_voc.py +77 -0
  138. esgvoc/core/service/resolver_config.py +56 -0
  139. esgvoc/core/service/state.py +324 -0
  140. esgvoc/core/service/string_heuristics.py +98 -0
  141. esgvoc/core/service/term_cache.py +108 -0
  142. esgvoc/core/service/uri_resolver.py +133 -0
  143. esgvoc-2.0.2.dist-info/METADATA +82 -0
  144. esgvoc-2.0.2.dist-info/RECORD +147 -0
  145. esgvoc-2.0.2.dist-info/WHEEL +4 -0
  146. esgvoc-2.0.2.dist-info/entry_points.txt +2 -0
  147. esgvoc-2.0.2.dist-info/licenses/LICENSE.txt +519 -0
@@ -0,0 +1,1611 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ CV Testing Application for ESGVoc
4
+
5
+ This application allows testing of project CVs and Universe CVs with support for:
6
+ - Custom repository URLs and branches via CLI options and environment variables
7
+ - Universe branch override for testing against different WCRP-universe versions
8
+ - Validation of repository structure and content
9
+ - Testing YAML specification files (project_specs.yaml, drs_specs.yaml, catalog_spec.yaml, attr_specs.yaml)
10
+ - Testing esgvoc API integration with CV repositories
11
+ - Support for all available default projects: cmip6, cmip6plus, input4mip, obs4mip, cordex-cmip6
12
+ - Rich CLI interface integrated with esgvoc CLI
13
+ - Environment variable support for CI/CD integration
14
+ - Automatic repository path detection for synchronized CVs
15
+ """
16
+
17
+ import json
18
+ import os
19
+ import sys
20
+ from pathlib import Path
21
+ from typing import List
22
+
23
+ from pydantic import ValidationError
24
+ from rich.console import Console
25
+
26
+ import esgvoc.core.service as service
27
+ from esgvoc.core.service.configuration.setting import (
28
+ ServiceSettings,
29
+ )
30
+ from esgvoc.core.service.state import StateService
31
+
32
+ console = Console()
33
+
34
+
35
+ def detect_project_name() -> str:
36
+ """
37
+ Try to auto-detect project name from current directory or environment.
38
+ Falls back to a reasonable default for testing.
39
+ """
40
+ # Check environment first
41
+ env_project = os.environ.get("PROJECT_NAME")
42
+ if env_project:
43
+ return env_project.lower()
44
+
45
+ # Try to detect from current directory name or path
46
+ cwd = Path.cwd()
47
+ dir_name = cwd.name.lower()
48
+
49
+ # Check if directory name matches any known project patterns
50
+ project_patterns = {
51
+ "obs4mips": ["obs4mips", "obs4mip"],
52
+ "input4mips": ["input4mips", "input4mip"],
53
+ "cmip6": ["cmip6"],
54
+ "cmip6plus": ["cmip6plus", "cmip6+"],
55
+ "cordex-cmip6": ["cordex-cmip6", "cordex", "cordexcmip6"],
56
+ }
57
+
58
+ for project, patterns in project_patterns.items():
59
+ if any(pattern in dir_name for pattern in patterns):
60
+ return project
61
+
62
+ # Check parent directories
63
+ for parent in cwd.parents:
64
+ parent_name = parent.name.lower()
65
+ for project, patterns in project_patterns.items():
66
+ if any(pattern in parent_name for pattern in patterns):
67
+ return project
68
+
69
+ # Default fallback
70
+ console.print("[yellow]⚠️ Could not auto-detect project, using 'obs4mip' as default[/yellow]")
71
+ return "obs4mip"
72
+
73
+
74
+ class CVTester:
75
+ """Main CV testing class"""
76
+
77
+ def __init__(self, debug_missing_terms: bool = True):
78
+ self.original_config_name = None
79
+ self.test_config_name = "test_cv_temp"
80
+ self.config_manager = None
81
+ self.debug_missing_terms = debug_missing_terms
82
+
83
+ def get_available_projects(self) -> List[str]:
84
+ """Get list of all available project CVs"""
85
+ return list(ServiceSettings._get_default_project_configs().keys())
86
+
87
+ def configure_for_testing(
88
+ self,
89
+ project_name: str = None,
90
+ repo_url: str = None,
91
+ branch: str = None,
92
+ esgvoc_branch: str = None,
93
+ universe_branch: str = None,
94
+ ) -> bool:
95
+ """
96
+ Configure esgvoc with custom or default CV settings for testing
97
+
98
+ Args:
99
+ project_name: Name of the project to test (required)
100
+ repo_url: Custom repository URL (optional - uses default if not provided)
101
+ branch: Custom branch (optional - uses default if not provided)
102
+ esgvoc_branch: ESGVoc library branch (for info only)
103
+ universe_branch: Custom universe branch (optional - uses 'esgvoc' if not provided)
104
+
105
+ Returns:
106
+ bool: True if configuration was successful
107
+ """
108
+ try:
109
+ # Get config manager and store original active configuration
110
+ self.config_manager = service.get_config_manager()
111
+ self.original_config_name = self.config_manager.get_active_config_name()
112
+
113
+ console.print(f"[blue]Current active configuration: {self.original_config_name}[/blue]")
114
+
115
+ # Determine project configuration
116
+ if project_name not in self.get_available_projects():
117
+ available = ", ".join(self.get_available_projects())
118
+ console.print(f"[red]❌ Unknown project '{project_name}'. Available projects: {available}[/red]")
119
+ return False
120
+
121
+ # Use custom repo/branch if provided, otherwise use defaults
122
+ if repo_url or branch:
123
+ # Custom configuration
124
+ default_config = ServiceSettings._get_default_project_configs()[project_name]
125
+ project_config = {
126
+ "project_name": project_name,
127
+ "github_repo": repo_url or default_config["github_repo"],
128
+ "branch": branch or default_config["branch"],
129
+ "local_path": default_config["local_path"],
130
+ "db_path": default_config["db_path"],
131
+ }
132
+ console.print(f"[blue]Using custom configuration for {project_name}:[/blue]")
133
+ console.print(f" Repository: {project_config['github_repo']}")
134
+ console.print(f" Branch: {project_config['branch']}")
135
+ else:
136
+ # Default configuration
137
+ project_config = ServiceSettings._get_default_project_configs()[project_name].copy()
138
+ console.print(f"[blue]Using default configuration for {project_name}[/blue]")
139
+
140
+ # Create temporary test configuration with universe and single project
141
+ test_config_data = {
142
+ "universe": {
143
+ "github_repo": "https://github.com/WCRP-CMIP/WCRP-universe",
144
+ "branch": universe_branch or "esgvoc",
145
+ "local_path": "repos/WCRP-universe",
146
+ "db_path": "dbs/universe.sqlite",
147
+ },
148
+ "projects": [project_config],
149
+ }
150
+
151
+ # Clean up old test_cv_temp data directories (repos and dbs) to ensure fresh start
152
+ import shutil
153
+ test_data_dir = self.config_manager.data_dir / self.test_config_name
154
+ if test_data_dir.exists():
155
+ console.print(f"[yellow]Cleaning up old test data directories...[/yellow]")
156
+ try:
157
+ shutil.rmtree(test_data_dir)
158
+ console.print(f"[green] ✓ Removed: {test_data_dir}[/green]")
159
+ except Exception as e:
160
+ console.print(f"[yellow] Warning: Failed to clean test data directories: {e}[/yellow]")
161
+
162
+ # Remove existing test config if it exists
163
+ configs = self.config_manager.list_configs()
164
+ if self.test_config_name in configs:
165
+ console.print(f"[yellow]Removing existing test configuration: {self.test_config_name}[/yellow]")
166
+ self.config_manager.remove_config(self.test_config_name)
167
+
168
+ # Create new test configuration
169
+ console.print(f"[blue]Creating temporary test configuration: {self.test_config_name}[/blue]")
170
+ console.print(f"[dim]Debug: Test config data projects: {test_config_data['projects']}[/dim]")
171
+ self.config_manager.add_config(self.test_config_name, test_config_data)
172
+
173
+ # Switch to test configuration
174
+ self.config_manager.switch_config(self.test_config_name)
175
+ console.print(f"[green]✅ Switched to test configuration: {self.test_config_name}[/green]")
176
+
177
+ # CRITICAL FIX: Update the data_config_dir after switching configurations
178
+ # This is the root cause - data_config_dir is set once and never updated
179
+ self.config_manager.data_config_dir = self.config_manager.data_dir / self.test_config_name
180
+ self.config_manager.data_config_dir.mkdir(parents=True, exist_ok=True)
181
+ console.print(f"[dim]Debug: Updated data_config_dir to: {self.config_manager.data_config_dir}[/dim]")
182
+
183
+ # Clear any potential caches in the config manager
184
+ if hasattr(self.config_manager, "_cached_config"):
185
+ self.config_manager._cached_config = None
186
+ if hasattr(self.config_manager, "cache"):
187
+ self.config_manager.cache.clear()
188
+
189
+ # Create fresh StateService with the updated configuration and directory
190
+ fresh_config = self.config_manager.get_config(self.test_config_name)
191
+ service.current_state = service.StateService(fresh_config)
192
+ console.print(f"[dim]Debug: Created fresh StateService for {self.test_config_name}[/dim]")
193
+
194
+ # Debug: Verify the fix worked
195
+ console.print(
196
+ f"[dim]Debug: StateService universe base_dir: {service.current_state.universe.base_dir}[/dim]"
197
+ )
198
+ console.print(
199
+ f"[dim]Debug: StateService universe local_path: {service.current_state.universe.local_path}[/dim]"
200
+ )
201
+
202
+ if esgvoc_branch:
203
+ console.print(f"[dim]Using esgvoc library from branch: {esgvoc_branch}[/dim]")
204
+
205
+ return True
206
+
207
+ except Exception as e:
208
+ console.print(f"[red]❌ Configuration failed: {e}[/red]")
209
+ import traceback
210
+
211
+ console.print(traceback.format_exc())
212
+ return False
213
+
214
+ def synchronize_cvs(self) -> bool:
215
+ """Synchronize/download the configured CVs"""
216
+ try:
217
+ console.print("[blue]Synchronizing CVs...[/blue]")
218
+
219
+ # Force refresh the state service to ensure it uses the correct configuration
220
+ service.current_state = service.get_state()
221
+
222
+ # Debug: Show what configuration the state service is using
223
+ config_manager = service.get_config_manager()
224
+ active_config = config_manager.get_active_config_name()
225
+ console.print(f"[dim]Debug: Active config during sync: {active_config}[/dim]")
226
+ console.print(f"[dim]Debug: Expected config: {self.test_config_name}[/dim]")
227
+ console.print(f"[dim]Debug: Data config dir during sync: {config_manager.data_config_dir}[/dim]")
228
+
229
+ if active_config != self.test_config_name:
230
+ console.print(
231
+ f"[yellow]⚠️ Warning: Active config mismatch, forcing switch to {self.test_config_name}[/yellow]"
232
+ )
233
+ config_manager.switch_config(self.test_config_name)
234
+
235
+ # Update data_config_dir after forced switch
236
+ config_manager.data_config_dir = config_manager.data_dir / self.test_config_name
237
+ config_manager.data_config_dir.mkdir(parents=True, exist_ok=True)
238
+
239
+ # Clear caches again after forced switch
240
+ if hasattr(config_manager, "_cached_config"):
241
+ config_manager._cached_config = None
242
+ if hasattr(config_manager, "cache"):
243
+ config_manager.cache.clear()
244
+
245
+ # Create fresh StateService with correct configuration
246
+ fresh_config = config_manager.get_config(self.test_config_name)
247
+ service.current_state = StateService(fresh_config)
248
+ console.print(f"[dim]Debug: Recreated StateService for {self.test_config_name}[/dim]")
249
+
250
+ service.current_state.synchronize_all()
251
+ console.print("[green]✅ CVs synchronized successfully[/green]")
252
+ return True
253
+ except Exception as e:
254
+ console.print(f"[red]❌ CV synchronization failed: {e}[/red]")
255
+ import traceback
256
+
257
+ console.print(traceback.format_exc())
258
+ return False
259
+
260
+ def test_repository_structure(self, repo_path: str = ".") -> bool:
261
+ """
262
+ Test repository structure and file requirements
263
+
264
+ Args:
265
+ repo_path: Path to the repository to test (default: current directory)
266
+
267
+ Returns:
268
+ bool: True if all tests pass
269
+ """
270
+ console.print(f"[blue]🧪 Testing repository structure in: {repo_path}[/blue]")
271
+
272
+ repo_dir = Path(repo_path)
273
+ if not repo_dir.exists():
274
+ console.print(f"[red]❌ Repository path does not exist: {repo_path}[/red]")
275
+ return False
276
+
277
+ errors = []
278
+ warnings = []
279
+
280
+ # Get all directories
281
+ all_directories = [p for p in repo_dir.iterdir() if p.is_dir()]
282
+
283
+ # Identify collection directories by presence of .jsonld files
284
+ collection_directories = []
285
+ directories_with_json_but_no_jsonld = []
286
+
287
+ for directory in all_directories:
288
+ files_in_dir = list(directory.iterdir())
289
+ jsonld_files = [f for f in files_in_dir if f.name.endswith(".jsonld")]
290
+ json_files = [f for f in files_in_dir if f.name.endswith(".json") and not f.name.endswith(".jsonld")]
291
+
292
+ if len(jsonld_files) > 0:
293
+ collection_directories.append(directory)
294
+ elif len(json_files) > 0:
295
+ directories_with_json_but_no_jsonld.append(directory)
296
+
297
+ console.print(f"Found {len(collection_directories)} collection directories (with .jsonld files)")
298
+
299
+ # Warn about directories that might be missing context files
300
+ for directory in directories_with_json_but_no_jsonld:
301
+ warnings.append(f"⚠️ Directory '{directory.name}' has .json files but no .jsonld context")
302
+
303
+ # Test each collection directory
304
+ for directory in collection_directories:
305
+ console.print(f"📁 Testing collection: {directory.name}")
306
+ collection_errors = self._test_collection_directory(directory)
307
+ errors.extend(collection_errors)
308
+
309
+ # Add context validation warnings (only if collection passed basic validation)
310
+ if not collection_errors:
311
+ context_warnings = self._validate_context_usage(directory, directory.name)
312
+ for warning in context_warnings:
313
+ console.print(f" {warning}")
314
+
315
+ # Test YAML specification files if they exist
316
+ yaml_specs_errors = self._test_yaml_specs(repo_dir, collection_directories)
317
+ errors.extend(yaml_specs_errors)
318
+
319
+ # Display warnings
320
+ if warnings:
321
+ console.print(f"\n[yellow]Warnings ({len(warnings)}):[/yellow]")
322
+ for warning in warnings:
323
+ console.print(f" {warning}")
324
+
325
+ # Summary
326
+ if errors:
327
+ console.print(f"\n[red]❌ Repository structure validation failed with {len(errors)} errors:[/red]")
328
+ for error in errors:
329
+ console.print(f" {error}")
330
+ return False
331
+ else:
332
+ console.print("\n[green]✅ Repository structure validation passed![/green]")
333
+ console.print(f"✅ Validated {len(collection_directories)} collection directories")
334
+ return True
335
+
336
+ def _test_collection_directory(self, directory: Path) -> List[str]:
337
+ """Test a single collection directory"""
338
+ errors = []
339
+
340
+ files_in_dir = list(directory.iterdir())
341
+ jsonld_files = [f for f in files_in_dir if f.name.endswith(".jsonld")]
342
+ other_files = [f for f in files_in_dir if not f.name.endswith(".jsonld")]
343
+
344
+ # Test directory structure
345
+ if len(jsonld_files) == 0:
346
+ errors.append(f"❌ {directory.name}: No .jsonld context file found")
347
+ elif len(jsonld_files) > 1:
348
+ console.print(f" [yellow]⚠️ Multiple .jsonld files: {[f.name for f in jsonld_files]}[/yellow]")
349
+
350
+ if len(other_files) == 0:
351
+ errors.append(f"❌ {directory.name}: No element files found")
352
+
353
+ # Test JSONLD context files
354
+ for jsonld_file in jsonld_files:
355
+ try:
356
+ with open(jsonld_file, "r", encoding="utf-8") as f:
357
+ jsonld_content = json.load(f)
358
+
359
+ if "@context" not in jsonld_content:
360
+ errors.append(f"❌ {jsonld_file.name}: Missing '@context' field")
361
+ continue
362
+
363
+ context = jsonld_content["@context"]
364
+ if not isinstance(context, dict):
365
+ errors.append(f"❌ {jsonld_file.name}: '@context' must be a dictionary")
366
+ continue
367
+
368
+ # Check required context fields
369
+ required_fields = ["id", "type", "@base"]
370
+ missing_fields = [field for field in required_fields if field not in context]
371
+ if missing_fields:
372
+ errors.append(f"❌ {jsonld_file.name}: Missing required fields in @context: {missing_fields}")
373
+
374
+ except json.JSONDecodeError as e:
375
+ errors.append(f"❌ {jsonld_file.name}: Invalid JSON syntax - {e}")
376
+ except Exception as e:
377
+ errors.append(f"❌ {jsonld_file.name}: Error reading file - {e}")
378
+
379
+ # Test element files
380
+ json_element_files = [f for f in other_files if f.name.endswith(".json")]
381
+ for element_file in json_element_files:
382
+ try:
383
+ with open(element_file, "r", encoding="utf-8") as f:
384
+ element_content = json.load(f)
385
+
386
+ required_fields = ["id", "type", "@context"]
387
+ missing_fields = [field for field in required_fields if field not in element_content]
388
+ if missing_fields:
389
+ errors.append(f"❌ {element_file.name}: Missing required fields: {missing_fields}")
390
+
391
+ except json.JSONDecodeError as e:
392
+ errors.append(f"❌ {element_file.name}: Invalid JSON syntax - {e}")
393
+ except Exception as e:
394
+ errors.append(f"❌ {element_file.name}: Error reading file - {e}")
395
+
396
+ if not errors:
397
+ console.print(f" [green]✅ Collection '{directory.name}' passed validation[/green]")
398
+
399
+ return errors
400
+
401
+ def _test_yaml_specs(self, repo_dir: Path, collection_directories: List[Path]) -> List[str]:
402
+ """Test YAML specification files (project_specs.yaml, drs_specs.yaml, catalog_spec.yaml, attr_specs.yaml)"""
403
+ errors = []
404
+
405
+ # Add clear section header
406
+ console.print(f"\n[bold blue]📋 Testing YAML Specification Files[/bold blue]")
407
+ console.print(f"[dim]Repository path: {repo_dir}[/dim]")
408
+
409
+ # Import constants and YAML handling
410
+ try:
411
+ from esgvoc.core.constants import (
412
+ PROJECT_SPECS_FILENAME,
413
+ DRS_SPECS_FILENAME,
414
+ CATALOG_SPECS_FILENAME,
415
+ ATTRIBUTES_SPECS_FILENAME
416
+ )
417
+ except ImportError as e:
418
+ error_msg = f"❌ Missing required esgvoc constants: {e}"
419
+ errors.append(error_msg)
420
+ console.print(f"[red]{error_msg}[/red]")
421
+ return errors
422
+
423
+ try:
424
+ import yaml
425
+ except ImportError:
426
+ error_msg = f"❌ PyYAML not installed. Install with: pip install PyYAML"
427
+ errors.append(error_msg)
428
+ console.print(f"[red]{error_msg}[/red]")
429
+ return errors
430
+
431
+ # Get existing collections for validation
432
+ existing_collections = {d.name for d in collection_directories}
433
+ source_collections = set()
434
+ # Track which files contain each collection reference for better error reporting
435
+ collection_file_mapping = {} # collection_name -> set of files that reference it
436
+ files_tested = 0
437
+
438
+ # Test project_specs.yaml
439
+ project_specs_file = repo_dir / PROJECT_SPECS_FILENAME
440
+ if project_specs_file.exists():
441
+ console.print(f"📄 Testing {PROJECT_SPECS_FILENAME}...")
442
+ try:
443
+ with open(project_specs_file, "r", encoding="utf-8") as f:
444
+ project_specs = yaml.safe_load(f)
445
+ console.print(f" [green]✅ {PROJECT_SPECS_FILENAME} parsed successfully[/green]")
446
+ files_tested += 1
447
+ except yaml.YAMLError as e:
448
+ error_msg = f"❌ {PROJECT_SPECS_FILENAME}: Invalid YAML syntax - {e}"
449
+ errors.append(error_msg)
450
+ console.print(f" [red]{error_msg}[/red]")
451
+ except Exception as e:
452
+ error_msg = f"❌ Error reading {PROJECT_SPECS_FILENAME}: {e}"
453
+ errors.append(error_msg)
454
+ console.print(f" [red]{error_msg}[/red]")
455
+ else:
456
+ error_msg = f"❌ Required file {PROJECT_SPECS_FILENAME} not found"
457
+ errors.append(error_msg)
458
+ console.print(f"📄 [red]{error_msg}[/red]")
459
+
460
+ # Test drs_specs.yaml
461
+ drs_specs_file = repo_dir / DRS_SPECS_FILENAME
462
+ if drs_specs_file.exists():
463
+ console.print(f"📄 Testing {DRS_SPECS_FILENAME}...")
464
+ try:
465
+ with open(drs_specs_file, "r", encoding="utf-8") as f:
466
+ drs_specs = yaml.safe_load(f)
467
+
468
+ # Extract collection references from DRS specs
469
+ for drs_name, drs_spec in drs_specs.items():
470
+ if isinstance(drs_spec, dict) and "parts" in drs_spec:
471
+ for part in drs_spec["parts"]:
472
+ if isinstance(part, dict):
473
+ # Handle both old format (collection_id) and new format (source_collection)
474
+ collection_ref = part.get("collection_id") or part.get("source_collection")
475
+ if collection_ref:
476
+ source_collections.add(collection_ref)
477
+ if collection_ref not in collection_file_mapping:
478
+ collection_file_mapping[collection_ref] = set()
479
+ collection_file_mapping[collection_ref].add(DRS_SPECS_FILENAME)
480
+
481
+ console.print(f" [green]✅ {DRS_SPECS_FILENAME} parsed successfully[/green]")
482
+ files_tested += 1
483
+ except yaml.YAMLError as e:
484
+ error_msg = f"❌ {DRS_SPECS_FILENAME}: Invalid YAML syntax - {e}"
485
+ errors.append(error_msg)
486
+ console.print(f" [red]{error_msg}[/red]")
487
+ except Exception as e:
488
+ error_msg = f"❌ Error reading {DRS_SPECS_FILENAME}: {e}"
489
+ errors.append(error_msg)
490
+ console.print(f" [red]{error_msg}[/red]")
491
+ else:
492
+ console.print(f" [yellow]⚠️ Optional file {DRS_SPECS_FILENAME} not found[/yellow]")
493
+
494
+ # Test catalog_spec.yaml (optional)
495
+ catalog_specs_file = repo_dir / CATALOG_SPECS_FILENAME
496
+ if catalog_specs_file.exists():
497
+ console.print(f"📄 Testing {CATALOG_SPECS_FILENAME}...")
498
+ try:
499
+ with open(catalog_specs_file, "r", encoding="utf-8") as f:
500
+ catalog_specs = yaml.safe_load(f)
501
+
502
+ # Extract collection references from catalog specs
503
+ if isinstance(catalog_specs, dict):
504
+ # Check dataset_properties and file_properties
505
+ for prop_type in ["dataset_properties", "file_properties"]:
506
+ if prop_type in catalog_specs and isinstance(catalog_specs[prop_type], list):
507
+ for prop in catalog_specs[prop_type]:
508
+ if isinstance(prop, dict) and "source_collection" in prop:
509
+ collection_ref = prop["source_collection"]
510
+ # Skip None values - collections can now be null in YAML
511
+ if collection_ref is not None:
512
+ source_collections.add(collection_ref)
513
+ if collection_ref not in collection_file_mapping:
514
+ collection_file_mapping[collection_ref] = set()
515
+ collection_file_mapping[collection_ref].add(CATALOG_SPECS_FILENAME)
516
+
517
+ console.print(f" [green]✅ {CATALOG_SPECS_FILENAME} parsed successfully[/green]")
518
+ files_tested += 1
519
+ except yaml.YAMLError as e:
520
+ error_msg = f"❌ {CATALOG_SPECS_FILENAME}: Invalid YAML syntax - {e}"
521
+ errors.append(error_msg)
522
+ console.print(f" [red]{error_msg}[/red]")
523
+ except Exception as e:
524
+ error_msg = f"❌ Error reading {CATALOG_SPECS_FILENAME}: {e}"
525
+ errors.append(error_msg)
526
+ console.print(f" [red]{error_msg}[/red]")
527
+ else:
528
+ console.print(f" [yellow]⚠️ Optional file {CATALOG_SPECS_FILENAME} not found[/yellow]")
529
+
530
+ # Test attr_specs.yaml (now ingested by esgvoc as confirmed by project_ingestion.py updates)
531
+ attr_specs_file = repo_dir / ATTRIBUTES_SPECS_FILENAME
532
+ if attr_specs_file.exists():
533
+ console.print(f"📄 Testing {ATTRIBUTES_SPECS_FILENAME}...")
534
+ try:
535
+ with open(attr_specs_file, "r", encoding="utf-8") as f:
536
+ attr_specs = yaml.safe_load(f)
537
+
538
+ # Extract collection references from attribute specs
539
+ if isinstance(attr_specs, list):
540
+ # New format: list of AttributeProperty objects
541
+ for attr_spec in attr_specs:
542
+ if isinstance(attr_spec, dict) and "source_collection" in attr_spec:
543
+ collection_ref = attr_spec["source_collection"]
544
+ # Skip None values - collections can now be null in YAML
545
+ if collection_ref is not None:
546
+ source_collections.add(collection_ref)
547
+ if collection_ref not in collection_file_mapping:
548
+ collection_file_mapping[collection_ref] = set()
549
+ collection_file_mapping[collection_ref].add(ATTRIBUTES_SPECS_FILENAME)
550
+ elif isinstance(attr_specs, dict):
551
+ # Legacy format: nested structure with "specs" key
552
+ if "specs" in attr_specs:
553
+ specs = attr_specs["specs"]
554
+ if isinstance(specs, dict):
555
+ for attr_name, attr_spec in specs.items():
556
+ if isinstance(attr_spec, dict) and "source_collection" in attr_spec:
557
+ collection_ref = attr_spec["source_collection"]
558
+ # Skip None values - collections can now be null in YAML
559
+ if collection_ref is not None:
560
+ source_collections.add(collection_ref)
561
+ if collection_ref not in collection_file_mapping:
562
+ collection_file_mapping[collection_ref] = set()
563
+ collection_file_mapping[collection_ref].add(ATTRIBUTES_SPECS_FILENAME)
564
+ elif isinstance(specs, list):
565
+ for attr_spec in specs:
566
+ if isinstance(attr_spec, dict) and "source_collection" in attr_spec:
567
+ collection_ref = attr_spec["source_collection"]
568
+ # Skip None values - collections can now be null in YAML
569
+ if collection_ref is not None:
570
+ source_collections.add(collection_ref)
571
+ if collection_ref not in collection_file_mapping:
572
+ collection_file_mapping[collection_ref] = set()
573
+ collection_file_mapping[collection_ref].add(ATTRIBUTES_SPECS_FILENAME)
574
+
575
+ console.print(f" [green]✅ {ATTRIBUTES_SPECS_FILENAME} parsed successfully[/green]")
576
+ files_tested += 1
577
+ except yaml.YAMLError as e:
578
+ error_msg = f"❌ {ATTRIBUTES_SPECS_FILENAME}: Invalid YAML syntax - {e}"
579
+ errors.append(error_msg)
580
+ console.print(f" [red]{error_msg}[/red]")
581
+ except Exception as e:
582
+ error_msg = f"❌ Error reading {ATTRIBUTES_SPECS_FILENAME}: {e}"
583
+ errors.append(error_msg)
584
+ console.print(f" [red]{error_msg}[/red]")
585
+ else:
586
+ console.print(f" [yellow]⚠️ Optional file {ATTRIBUTES_SPECS_FILENAME} not found[/yellow]")
587
+
588
+ # Validate collection references
589
+ console.print(f"\n📂 Validating collection references...")
590
+ if source_collections:
591
+ console.print(f" Found {len(source_collections)} source_collection references")
592
+
593
+ for collection in source_collections:
594
+ if collection not in existing_collections:
595
+ # Enhanced error message showing which files contain the reference
596
+ referencing_files = collection_file_mapping.get(collection, set())
597
+ files_list = ", ".join(sorted(referencing_files))
598
+ error_msg = f"❌ YAML specs reference non-existent collection: '{collection}' (referenced in: {files_list})"
599
+ errors.append(error_msg)
600
+ console.print(f" [red]{error_msg}[/red]")
601
+ else:
602
+ console.print(f" [green]✅ Reference '{collection}' exists[/green]")
603
+ else:
604
+ console.print(" [yellow]⚠️ No collection references found in YAML specs[/yellow]")
605
+
606
+ # Final YAML validation summary
607
+ console.print(f"\n📊 YAML Validation Summary:")
608
+ if files_tested == 0:
609
+ error_msg = "❌ No YAML specification files found"
610
+ errors.append(error_msg)
611
+ console.print(f" [red]{error_msg}[/red]")
612
+ else:
613
+ if errors:
614
+ console.print(f" [red]❌ {len(errors)} errors found in YAML files[/red]")
615
+ else:
616
+ console.print(f" [green]✅ All {files_tested} YAML specification files are valid[/green]")
617
+
618
+ console.print(f" [blue]Files tested: {files_tested}[/blue]")
619
+
620
+ return errors
621
+
622
+ def _test_esgvoc_specs_ingestion(self, project_name: str, repo_dir: Path) -> List[str]:
623
+ """Test that YAML specs are properly ingested into esgvoc and accessible via API"""
624
+ errors = []
625
+
626
+ try:
627
+ # Import esgvoc API and constants
628
+ import esgvoc.api as ev
629
+ from esgvoc.core.constants import ATTRIBUTES_SPECS_FILENAME
630
+ except ImportError as e:
631
+ errors.append(f"❌ Cannot import esgvoc modules for ingestion testing: {e}")
632
+ return errors
633
+
634
+ try:
635
+ import yaml
636
+ except ImportError:
637
+ errors.append(f"❌ PyYAML not installed. Install with: pip install PyYAML")
638
+ return errors
639
+
640
+ console.print(f"🔍 Testing esgvoc ingestion compatibility for {project_name}...")
641
+
642
+ # Get the project specs from esgvoc
643
+ try:
644
+ project = ev.get_project(project_name)
645
+ console.print(f" [green]✅ Project '{project_name}' found in esgvoc[/green]")
646
+
647
+ if hasattr(project, 'attr_specs') and hasattr(project, 'drs_specs'):
648
+ # Project is properly loaded with specs - convert to dict format for compatibility
649
+ specs = {}
650
+ if hasattr(project, 'attr_specs') and project.attr_specs:
651
+ specs["attr_specs"] = project.attr_specs
652
+ if hasattr(project, 'drs_specs') and project.drs_specs:
653
+ specs["drs_specs"] = project.drs_specs
654
+ if hasattr(project, 'catalog_specs') and project.catalog_specs:
655
+ specs["catalog_specs"] = project.catalog_specs
656
+
657
+ console.print(f" [blue]📊 Project specs loaded with keys: {list(specs.keys())}[/blue]")
658
+
659
+ # Test attr_specs ingestion specifically
660
+ attr_specs_file = repo_dir / ATTRIBUTES_SPECS_FILENAME
661
+ if attr_specs_file.exists() and "attr_specs" in specs:
662
+ console.print(f" [green]✅ attr_specs found in ingested project data[/green]")
663
+
664
+ # Load the original YAML for comparison
665
+ with open(attr_specs_file, "r", encoding="utf-8") as f:
666
+ original_attr_specs = yaml.safe_load(f)
667
+
668
+ ingested_attr_specs = specs["attr_specs"]
669
+
670
+ # Validate structure compatibility
671
+ if isinstance(original_attr_specs, list) and isinstance(ingested_attr_specs, list):
672
+ console.print(f" [green]✅ attr_specs structure matches: {len(original_attr_specs)} items in YAML, {len(ingested_attr_specs)} items ingested[/green]")
673
+
674
+ # Check for source_collection fields
675
+ yaml_collections = set()
676
+ ingested_collections = set()
677
+
678
+ for item in original_attr_specs:
679
+ if isinstance(item, dict) and "source_collection" in item:
680
+ collection_ref = item["source_collection"]
681
+ # Skip None values - collections can now be null in YAML
682
+ if collection_ref is not None:
683
+ yaml_collections.add(collection_ref)
684
+
685
+ for item in ingested_attr_specs:
686
+ if isinstance(item, dict) and "source_collection" in item:
687
+ collection_ref = item["source_collection"]
688
+ if collection_ref is not None:
689
+ ingested_collections.add(collection_ref)
690
+ elif hasattr(item, "source_collection"):
691
+ # Handle Pydantic model objects
692
+ collection_ref = item.source_collection
693
+ if collection_ref is not None:
694
+ ingested_collections.add(collection_ref)
695
+
696
+ if yaml_collections == ingested_collections:
697
+ console.print(f" [green]✅ Collection references preserved: {sorted(yaml_collections)}[/green]")
698
+ else:
699
+ errors.append(f"❌ Collection reference mismatch - YAML: {sorted(yaml_collections)}, Ingested: {sorted(ingested_collections)}")
700
+ else:
701
+ console.print(f" [yellow]⚠️ Structure difference: YAML type={type(original_attr_specs)}, Ingested type={type(ingested_attr_specs)}[/yellow]")
702
+
703
+ elif attr_specs_file.exists():
704
+ console.print(f" [yellow]⚠️ attr_specs.yaml exists but not found in ingested project specs[/yellow]")
705
+
706
+ # Test drs_specs ingestion
707
+ if "drs_specs" in specs:
708
+ console.print(f" [green]✅ drs_specs found in ingested project data[/green]")
709
+ else:
710
+ console.print(f" [yellow]⚠️ drs_specs not found in ingested project data (may be optional)[/yellow]")
711
+
712
+ # Test catalog_specs ingestion
713
+ if "catalog_specs" in specs:
714
+ console.print(f" [green]✅ catalog_specs found in ingested project data[/green]")
715
+ else:
716
+ console.print(f" [yellow]⚠️ catalog_specs not found in ingested project data (may be optional)[/yellow]")
717
+
718
+ else:
719
+ # More detailed error message about missing specs
720
+ expected_specs = ["project_specs (required)", "attr_specs (optional)", "drs_specs (optional)", "catalog_specs (optional)"]
721
+ console.print(f" [yellow]⚠️ Project '{project_name}' has no specs attributes. Expected specs: {', '.join(expected_specs)}[/yellow]")
722
+
723
+ except Exception as e:
724
+ errors.append(f"❌ Failed to retrieve project '{project_name}' from esgvoc: {e}")
725
+
726
+ return errors
727
+
728
+ def _debug_missing_term(self, project_name: str, collection_name: str, term_id: str, repo_path: str = "."):
729
+ """
730
+ Provide detailed debugging information for a missing term.
731
+
732
+ Args:
733
+ project_name: Name of the project
734
+ collection_name: Name of the collection
735
+ term_id: ID of the missing term
736
+ repo_path: Path to the repository
737
+ """
738
+ console.print(f"\n[bold yellow]🔍 Debugging missing term: {term_id} in {collection_name}[/bold yellow]")
739
+
740
+ repo_dir = Path(repo_path)
741
+ collection_dir = repo_dir / collection_name
742
+
743
+ # 1. Check if term exists in project repository
744
+ term_file = collection_dir / f"{term_id}.json"
745
+ console.print(f"\n[blue]📁 Project Repository ({project_name}):[/blue]")
746
+
747
+ if term_file.exists():
748
+ try:
749
+ with open(term_file, "r", encoding="utf-8") as f:
750
+ term_content = json.load(f)
751
+ console.print(f" [green]✅ Term found in project: {term_file}[/green]")
752
+ console.print(" [dim]Content:[/dim]")
753
+ formatted_json = json.dumps(term_content, indent=2, ensure_ascii=False)
754
+ for line in formatted_json.split("\n"):
755
+ console.print(f" {line}")
756
+ except Exception as e:
757
+ console.print(f" [red]❌ Error reading term file: {e}[/red]")
758
+ else:
759
+ console.print(f" [red]❌ Term not found in project: {term_file}[/red]")
760
+
761
+ # Try to find the term by searching for files that contain this term_id
762
+ console.print(f" [dim]Searching for files containing term ID '{term_id}'...[/dim]")
763
+ try:
764
+ for json_file in collection_dir.glob("*.json"):
765
+ if json_file.name.endswith(".jsonld"):
766
+ continue
767
+ try:
768
+ with open(json_file, "r", encoding="utf-8") as f:
769
+ content = json.load(f)
770
+ if content.get("id") == term_id:
771
+ console.print(f" [yellow]📄 Found term ID '{term_id}' in file: {json_file.name}[/yellow]")
772
+ console.print(f" [dim]Note: Filename '{json_file.name}' ≠ expected '{term_id}.json'[/dim]")
773
+ console.print(" [dim]Content:[/dim]")
774
+ formatted_json = json.dumps(content, indent=2, ensure_ascii=False)
775
+ for line in formatted_json.split("\n"):
776
+ console.print(f" {line}")
777
+ break
778
+ except Exception:
779
+ continue
780
+ else:
781
+ console.print(f" [dim]No file found containing term ID '{term_id}'[/dim]")
782
+ except Exception as e:
783
+ console.print(f" [dim]Error searching for term: {e}[/dim]")
784
+
785
+ # 2. Check if term exists in universe (using DataMerger to resolve links)
786
+ try:
787
+ current_state = service.get_state()
788
+ if hasattr(current_state, "universe") and current_state.universe.local_path:
789
+ universe_dir = Path(current_state.universe.local_path)
790
+
791
+ console.print(f"\n[blue]🌌 Universe Repository (resolved via DataMerger):[/blue]")
792
+
793
+ # First, try to use DataMerger to resolve the universe term if project term exists
794
+ resolved_universe_term = None
795
+ universe_term_path = None
796
+ project_term_content = None
797
+
798
+ if term_file.exists():
799
+ try:
800
+ # First, read the project term to see what it links to
801
+ with open(term_file, "r", encoding="utf-8") as f:
802
+ project_term_content = json.load(f)
803
+
804
+ from esgvoc.core.data_handler import JsonLdResource
805
+ from esgvoc.core.service.data_merger import DataMerger
806
+
807
+ # Use DataMerger to resolve the universe term like in project_ingestion.py
808
+ locally_avail = {
809
+ "https://espri-mod.github.io/mip-cmor-tables": str(current_state.universe.local_path)
810
+ }
811
+
812
+ console.print(f" [dim]Attempting DataMerger resolution...[/dim]")
813
+
814
+ # Check if project term has an @id link
815
+ if "@id" in project_term_content:
816
+ console.print(f" [dim]Project term @id: {project_term_content['@id']}[/dim]")
817
+
818
+ # Calculate expected universe path
819
+ if "https://espri-mod.github.io/mip-cmor-tables" in project_term_content["@id"]:
820
+ universe_relative_path = project_term_content["@id"].replace(
821
+ "https://espri-mod.github.io/mip-cmor-tables/", ""
822
+ )
823
+ if not universe_relative_path.endswith(".json"):
824
+ universe_relative_path += ".json"
825
+ universe_term_path = universe_dir / universe_relative_path
826
+ console.print(f" [dim]Expected universe path: {universe_term_path}[/dim]")
827
+ else:
828
+ console.print(f" [dim]Project term has no @id link to universe[/dim]")
829
+ # Even without @id, try to infer the universe path from context base
830
+ try:
831
+ # Read the context file to get the base
832
+ context_file = term_file.parent / "000_context.jsonld"
833
+ if context_file.exists():
834
+ with open(context_file, "r", encoding="utf-8") as f:
835
+ context_content = json.load(f)
836
+
837
+ base_url = context_content.get("@context", {}).get("@base", "")
838
+ if base_url and "https://espri-mod.github.io/mip-cmor-tables" in base_url:
839
+ universe_relative_path = (
840
+ base_url.replace("https://espri-mod.github.io/mip-cmor-tables/", "")
841
+ + f"{term_id}.json"
842
+ )
843
+ universe_term_path = universe_dir / universe_relative_path
844
+ console.print(f" [dim]Inferred from context @base: {universe_term_path}[/dim]")
845
+ except Exception as e:
846
+ console.print(f" [dim]Could not infer universe path from context: {e}[/dim]")
847
+
848
+ # Debug: Check what the JsonLdResource expansion produces
849
+ json_resource = JsonLdResource(uri=str(term_file))
850
+ console.print(f" [dim]JSON-LD expanded form: {json_resource.expanded}[/dim]")
851
+
852
+ merger_result = DataMerger(
853
+ data=json_resource,
854
+ locally_available=locally_avail,
855
+ ).merge_linked_json()
856
+
857
+ if merger_result and len(merger_result) > 1:
858
+ # If we have more than one result, the last one is the fully merged term
859
+ resolved_universe_term = merger_result[-1]
860
+
861
+ console.print(f" [green]✅ Term resolved via DataMerger (merged from universe)[/green]")
862
+ if universe_term_path:
863
+ console.print(f" [dim]Resolved universe path: {universe_term_path}[/dim]")
864
+ console.print(
865
+ f" [dim]Universe file exists: {universe_term_path.exists() if universe_term_path else 'N/A'}[/dim]"
866
+ )
867
+ console.print(" [dim]Merged content:[/dim]")
868
+ formatted_json = json.dumps(resolved_universe_term, indent=2, ensure_ascii=False)
869
+ for line in formatted_json.split("\n"):
870
+ console.print(f" {line}")
871
+ else:
872
+ console.print(
873
+ f" [yellow]⚠️ No universe term linked from project term (merge result length: {len(merger_result) if merger_result else 0})[/yellow]"
874
+ )
875
+
876
+ except Exception as e:
877
+ console.print(f" [red]❌ Error using DataMerger to resolve universe term: {e}[/red]")
878
+ # Still show what the project term was trying to link to
879
+ if project_term_content and "@id" in project_term_content:
880
+ console.print(
881
+ f" [dim]Project term was trying to link to: {project_term_content['@id']}[/dim]"
882
+ )
883
+ universe_relative_path = project_term_content["@id"].replace(
884
+ "https://espri-mod.github.io/mip-cmor-tables/", ""
885
+ )
886
+ if not universe_relative_path.endswith(".json"):
887
+ universe_relative_path += ".json"
888
+ universe_term_path = universe_dir / universe_relative_path
889
+ console.print(
890
+ f" [dim]Expected universe file: {universe_term_path} (exists: {universe_term_path.exists() if universe_term_path else False})[/dim]"
891
+ )
892
+
893
+ # Fallback: also check direct universe path and show resolved universe file if it was calculated
894
+ if not resolved_universe_term:
895
+ # Show the resolved path from DataMerger if we have it
896
+ if universe_term_path and universe_term_path.exists():
897
+ try:
898
+ with open(universe_term_path, "r", encoding="utf-8") as f:
899
+ universe_term_content = json.load(f)
900
+ console.print(
901
+ f" [green]✅ Universe file found at resolved path: {universe_term_path}[/green]"
902
+ )
903
+ console.print(" [dim]Content:[/dim]")
904
+ formatted_json = json.dumps(universe_term_content, indent=2, ensure_ascii=False)
905
+ for line in formatted_json.split("\n"):
906
+ console.print(f" {line}")
907
+ except Exception as e:
908
+ console.print(f" [red]❌ Error reading resolved universe file: {e}[/red]")
909
+ else:
910
+ # Show detailed path info - don't try direct collection path since it's wrong
911
+ console.print(f" [red]❌ Term not found in universe:[/red]")
912
+ if universe_term_path:
913
+ console.print(
914
+ f" [dim]• DataMerger resolved path: {universe_term_path} (exists: {universe_term_path.exists()})[/dim]"
915
+ )
916
+
917
+ # Try direct collection-based path as fallback (but note this may be incorrect for project collections vs universe structure)
918
+ universe_collection_dir = universe_dir / collection_name
919
+ universe_term_file = universe_collection_dir / f"{term_id}.json"
920
+ console.print(
921
+ f" [dim]• Direct collection path: {universe_term_file} (exists: {universe_term_file.exists()})[/dim]"
922
+ )
923
+
924
+ # Try to find similar files in the universe to help debugging
925
+ try:
926
+ if universe_term_path:
927
+ parent_dir = universe_term_path.parent
928
+ if parent_dir.exists():
929
+ similar_files = [
930
+ f.name
931
+ for f in parent_dir.iterdir()
932
+ if f.is_file() and f.suffix == ".json" and term_id.lower() in f.name.lower()
933
+ ]
934
+ if similar_files:
935
+ console.print(
936
+ f" [dim]• Similar files in {parent_dir.name}: {similar_files}[/dim]"
937
+ )
938
+
939
+ # Also check if there are files with different casing
940
+ all_files = [
941
+ f.name for f in parent_dir.iterdir() if f.is_file() and f.suffix == ".json"
942
+ ]
943
+ casing_matches = [f for f in all_files if f.lower() == f"{term_id.lower()}.json"]
944
+ if casing_matches and casing_matches[0] != f"{term_id}.json":
945
+ console.print(
946
+ f" [dim]• Case mismatch found: {casing_matches[0]} vs {term_id}.json[/dim]"
947
+ )
948
+ except Exception:
949
+ pass
950
+ else:
951
+ console.print(f" [yellow]⚠️ Universe path not available[/yellow]")
952
+ except Exception as e:
953
+ console.print(f" [red]❌ Error accessing universe: {e}[/red]")
954
+
955
+ # 3. Try to query the term via esgvoc API
956
+ console.print(f"\n[blue]🔗 ESGVoc API Query:[/blue]")
957
+ try:
958
+ import esgvoc.api as ev
959
+
960
+ # Try to get the term from project
961
+ try:
962
+ project_terms = ev.get_all_terms_in_collection(project_name, collection_name)
963
+ matching_terms = [term for term in project_terms if term.id == term_id]
964
+ if matching_terms:
965
+ term = matching_terms[0]
966
+ console.print(f" [green]✅ Term found in esgvoc project API[/green]")
967
+ console.print(f" ID: {term.id}")
968
+ console.print(f" Type: {term.type}")
969
+ console.print(f" Label: {getattr(term, 'label', 'N/A')}")
970
+ console.print(f" Description: {getattr(term, 'description', 'N/A')[:100]}...")
971
+ else:
972
+ console.print(f" [red]❌ Term not found in esgvoc project API[/red]")
973
+ except Exception as e:
974
+ console.print(f" [red]❌ Error querying project API: {e}[/red]")
975
+
976
+ # Try to get the term from universe (if available)
977
+ try:
978
+ universe_terms = ev.get_all_terms_in_collection("universe", collection_name)
979
+ matching_universe_terms = [term for term in universe_terms if term.id == term_id]
980
+ if matching_universe_terms:
981
+ term = matching_universe_terms[0]
982
+ console.print(f" [green]✅ Term found in esgvoc universe API[/green]")
983
+ console.print(f" ID: {term.id}")
984
+ console.print(f" Type: {term.type}")
985
+ console.print(f" Label: {getattr(term, 'label', 'N/A')}")
986
+ console.print(f" Description: {getattr(term, 'description', 'N/A')[:100]}...")
987
+ else:
988
+ console.print(f" [red]❌ Term not found in esgvoc universe API[/red]")
989
+ except Exception as e:
990
+ console.print(f" [red]❌ Error querying universe API: {e}[/red]")
991
+
992
+ except Exception as e:
993
+ console.print(f" [red]❌ Error importing esgvoc API: {e}[/red]")
994
+
995
+ def _validate_context_usage(self, collection_dir: Path, collection_name: str) -> list:
996
+ """
997
+ Validate context usage and detect potential issues.
998
+
999
+ Returns:
1000
+ list: List of warning messages
1001
+ """
1002
+ warnings = []
1003
+
1004
+ try:
1005
+ context_file = collection_dir / "000_context.jsonld"
1006
+ if not context_file.exists():
1007
+ return warnings
1008
+
1009
+ # Read context
1010
+ with open(context_file, "r", encoding="utf-8") as f:
1011
+ context_data = json.load(f)
1012
+
1013
+ context_mappings = context_data.get("@context", {})
1014
+ if not isinstance(context_mappings, dict):
1015
+ return warnings
1016
+
1017
+ # Get all JSON term files
1018
+ term_files = [f for f in collection_dir.glob("*.json") if not f.name.endswith(".jsonld")]
1019
+
1020
+ # Track context key usage
1021
+ context_keys_used = set()
1022
+ term_properties_used = set()
1023
+ terms_using_base_expansion = []
1024
+
1025
+ for term_file in term_files:
1026
+ try:
1027
+ with open(term_file, "r", encoding="utf-8") as f:
1028
+ term_content = json.load(f)
1029
+
1030
+ # Check what properties and values are used in the term
1031
+ for key, value in term_content.items():
1032
+ if key not in ["@context", "@id", "@type"]:
1033
+ term_properties_used.add(key)
1034
+
1035
+ # Check if this property has a shortcut in context
1036
+ if key in context_mappings:
1037
+ context_keys_used.add(key)
1038
+
1039
+ # Check if property values use context shortcuts
1040
+ # For example: "type": "source" where context has "source": "https://..."
1041
+ if isinstance(value, str) and value in context_mappings:
1042
+ context_keys_used.add(value)
1043
+
1044
+ # Check if term relies on @base expansion (has simple id but no explicit @id)
1045
+ term_id = term_content.get("id", term_file.stem)
1046
+ if "id" in term_content and "@id" not in term_content and "@base" in context_mappings:
1047
+ terms_using_base_expansion.append({"file": term_file.name, "id": term_id})
1048
+
1049
+ except Exception as e:
1050
+ continue
1051
+
1052
+ # Check for unused context keys (excluding standard JSON-LD keys)
1053
+ standard_keys = {"@base", "@vocab", "@language", "@version", "id", "type"}
1054
+ defined_keys = set(context_mappings.keys()) - standard_keys
1055
+ unused_keys = defined_keys - context_keys_used
1056
+
1057
+ if unused_keys:
1058
+ warnings.append(f"⚠️ Context defines unused keys in '{collection_name}': {sorted(unused_keys)}")
1059
+
1060
+ # Check for properties without shortcuts
1061
+ properties_without_shortcuts = term_properties_used - context_keys_used - {"id", "type"}
1062
+ if properties_without_shortcuts:
1063
+ warnings.append(
1064
+ f"⚠️ Properties used without context shortcuts in '{collection_name}': {sorted(properties_without_shortcuts)}"
1065
+ )
1066
+
1067
+ # Check for filename/ID mismatches
1068
+ filename_id_mismatches = []
1069
+ for term_file in term_files:
1070
+ try:
1071
+ with open(term_file, "r", encoding="utf-8") as f:
1072
+ term_content = json.load(f)
1073
+
1074
+ expected_id = term_file.stem # filename without .json extension
1075
+ actual_id = term_content.get("id")
1076
+
1077
+ if actual_id and actual_id != expected_id:
1078
+ filename_id_mismatches.append(
1079
+ {"file": term_file.name, "expected_id": expected_id, "actual_id": actual_id}
1080
+ )
1081
+ except Exception:
1082
+ continue
1083
+
1084
+ if filename_id_mismatches:
1085
+ warnings.append(f"⚠️ Filename/ID mismatches in '{collection_name}':")
1086
+ for mismatch in filename_id_mismatches[:5]: # Show first 5
1087
+ warnings.append(
1088
+ f" • {mismatch['file']}: id='{mismatch['actual_id']}' (expected '{mismatch['expected_id']}')"
1089
+ )
1090
+ if len(filename_id_mismatches) > 5:
1091
+ warnings.append(f" • ... and {len(filename_id_mismatches) - 5} more mismatches")
1092
+
1093
+ # Base expansion is normal JSON-LD behavior - only report if there might be issues
1094
+ # For now, we'll skip this since @base expansion is the expected pattern
1095
+
1096
+ # Only warn about @base vs shortcuts if they're used for the same purpose
1097
+ # @base is for term identity URLs, shortcuts are for property/type values - this is normal
1098
+ # We could add more sophisticated conflict detection here if needed
1099
+
1100
+ except Exception as e:
1101
+ warnings.append(f"⚠️ Error validating context usage in '{collection_name}': {e}")
1102
+
1103
+ return warnings
1104
+
1105
+ def _validate_universe_warnings(self) -> bool:
1106
+ """
1107
+ Validate universe repository for potential issues and display warnings.
1108
+
1109
+ Returns:
1110
+ bool: True if universe validation completed (warnings don't fail the test)
1111
+ """
1112
+ try:
1113
+ current_state = service.get_state()
1114
+ if not hasattr(current_state, "universe") or not current_state.universe.local_path:
1115
+ console.print(f"[dim]⚠️ Universe path not available for validation[/dim]")
1116
+ return True
1117
+
1118
+ universe_dir = Path(current_state.universe.local_path)
1119
+ if not universe_dir.exists():
1120
+ console.print(f"[dim]⚠️ Universe directory not found: {universe_dir}[/dim]")
1121
+ return True
1122
+
1123
+ console.print(f"[blue]🌌 Validating Universe Repository: {universe_dir.name}[/blue]")
1124
+
1125
+ # Find universe collections (directories with JSON files)
1126
+ universe_collections = []
1127
+ for item in universe_dir.iterdir():
1128
+ if item.is_dir():
1129
+ json_files = list(item.glob("*.json"))
1130
+ jsonld_files = [f for f in json_files if f.name.endswith(".jsonld")]
1131
+ regular_json_files = [f for f in json_files if not f.name.endswith(".jsonld")]
1132
+
1133
+ if regular_json_files:
1134
+ universe_collections.append(item)
1135
+
1136
+ console.print(f"Found {len(universe_collections)} universe collections to validate")
1137
+
1138
+ total_warnings = 0
1139
+ for collection_dir in universe_collections:
1140
+ warnings = self._validate_context_usage(collection_dir, collection_dir.name)
1141
+ if warnings:
1142
+ console.print(f"📁 Universe collection '{collection_dir.name}':")
1143
+ for warning in warnings:
1144
+ console.print(f" {warning}")
1145
+ total_warnings += 1
1146
+
1147
+ if total_warnings == 0:
1148
+ console.print("✅ No validation warnings found in universe")
1149
+ else:
1150
+ console.print(f"⚠️ Found {total_warnings} validation warnings in universe")
1151
+
1152
+ console.print("") # Add spacing before project validation
1153
+ return True
1154
+
1155
+ except Exception as e:
1156
+ console.print(f"[red]❌ Error validating universe: {e}[/red]")
1157
+ return True # Don't fail the test for universe validation errors
1158
+
1159
+ def test_esgvoc_api_access(self, project_name: str, repo_path: str = ".") -> bool:
1160
+ """
1161
+ Test that all repository collections and elements are queryable via esgvoc API
1162
+
1163
+ Args:
1164
+ project_name: Name of the project being tested
1165
+ repo_path: Path to the repository (default: current directory)
1166
+
1167
+ Returns:
1168
+ bool: True if all API tests pass
1169
+ """
1170
+ console.print(f"[blue]🔍 Testing esgvoc API access for project: {project_name}[/blue]")
1171
+
1172
+ try:
1173
+ import esgvoc.api as ev
1174
+ except ImportError as e:
1175
+ console.print(f"[red]❌ Cannot import esgvoc.api: {e}[/red]")
1176
+ return False
1177
+
1178
+ repo_dir = Path(repo_path)
1179
+ errors = []
1180
+
1181
+ # Test 1: Verify project exists in esgvoc
1182
+ try:
1183
+ projects = ev.get_all_projects()
1184
+ if project_name not in projects:
1185
+ errors.append(f"❌ Project '{project_name}' not found in esgvoc. Available: {projects}")
1186
+ return False
1187
+ console.print(f"[green]✅ Project '{project_name}' found in esgvoc[/green]")
1188
+ except Exception as e:
1189
+ errors.append(f"❌ Failed to get projects from esgvoc: {e}")
1190
+ return False
1191
+
1192
+ # Get repository collections
1193
+ repo_collections = []
1194
+ all_directories = [p for p in repo_dir.iterdir() if p.is_dir()]
1195
+ for directory in all_directories:
1196
+ files_in_dir = list(directory.iterdir())
1197
+ jsonld_files = [f for f in files_in_dir if f.name.endswith(".jsonld")]
1198
+ if len(jsonld_files) > 0:
1199
+ repo_collections.append(directory.name)
1200
+
1201
+ # Test 2: Get collections from esgvoc
1202
+ try:
1203
+ # Debug: Check active configuration during API test
1204
+ current_active = service.get_config_manager().get_active_config_name()
1205
+ console.print(f"[dim]Debug: Active config during API test: {current_active}[/dim]")
1206
+
1207
+ esgvoc_collections = ev.get_all_collections_in_project(project_name)
1208
+ console.print(
1209
+ f"Found {len(esgvoc_collections)} collections in esgvoc, {len(repo_collections)} in repository"
1210
+ )
1211
+ except ValidationError as e:
1212
+ # Enhanced error reporting for Pydantic validation errors
1213
+ error_msg = f"❌ Validation error while processing collections for project '{project_name}'"
1214
+
1215
+ # Try to extract more context from the error
1216
+ if hasattr(e, "errors") and e.errors():
1217
+ for error in e.errors():
1218
+ if "input" in error and "ctx" in error:
1219
+ error_msg += f"\n • Invalid value: '{error['input']}'"
1220
+ if "enum_values" in error["ctx"]:
1221
+ error_msg += f"\n • Expected one of: {error['ctx']['enum_values']}"
1222
+ if error.get("type") == "enum":
1223
+ error_msg += f"\n • Field: {error.get('loc', 'unknown')}"
1224
+
1225
+ errors.append(error_msg)
1226
+ console.print(f"[red]{error_msg}[/red]")
1227
+ console.print(f"[dim]Full error details: {str(e)}[/dim]")
1228
+ return False
1229
+ except ValueError as e:
1230
+ # Enhanced error reporting for database validation issues
1231
+ error_str = str(e)
1232
+ if "collections with empty term_kind" in error_str:
1233
+ console.print(f"[red]❌ Database validation error for project '{project_name}':[/red]")
1234
+ console.print(f"[red]{error_str}[/red]")
1235
+ errors.append(f"❌ Invalid termkind values in database for project '{project_name}'")
1236
+ else:
1237
+ errors.append(f"❌ Failed to get collections from esgvoc: {e}")
1238
+ console.print(f"[red]API Error Details: {e}[/red]")
1239
+ return False
1240
+ except Exception as e:
1241
+ errors.append(f"❌ Failed to get collections from esgvoc: {e}")
1242
+ console.print(f"[red]API Error Details: {e}[/red]")
1243
+ return False
1244
+
1245
+ # Test 3: Verify each repository collection is queryable
1246
+ missing_in_esgvoc = []
1247
+ for collection_name in repo_collections:
1248
+ if collection_name not in esgvoc_collections:
1249
+ missing_in_esgvoc.append(collection_name)
1250
+ else:
1251
+ console.print(f" [green]✅ Collection '{collection_name}' found in esgvoc[/green]")
1252
+
1253
+ if missing_in_esgvoc:
1254
+ errors.append(f"❌ Collections in repository but not in esgvoc: {missing_in_esgvoc}")
1255
+
1256
+ # Test 4: Test elements in each collection
1257
+ for collection_name in repo_collections:
1258
+ if collection_name in esgvoc_collections:
1259
+ console.print(f"📂 Testing elements in collection: {collection_name}")
1260
+
1261
+ # Get repository elements
1262
+ collection_dir = repo_dir / collection_name
1263
+ json_files = [
1264
+ f for f in collection_dir.iterdir() if f.name.endswith(".json") and not f.name.endswith(".jsonld")
1265
+ ]
1266
+
1267
+ repo_elements = []
1268
+ repo_element_sources = {} # Track where each ID comes from
1269
+ for json_file in json_files:
1270
+ try:
1271
+ with open(json_file, "r", encoding="utf-8") as f:
1272
+ content = json.load(f)
1273
+ element_id = content.get("id", json_file.stem)
1274
+ repo_elements.append(element_id)
1275
+ repo_element_sources[element_id] = {"file": json_file.name, "from_id_field": "id" in content}
1276
+ except:
1277
+ element_id = json_file.stem
1278
+ repo_elements.append(element_id)
1279
+ repo_element_sources[element_id] = {"file": json_file.name, "from_id_field": False}
1280
+
1281
+ # Get esgvoc elements
1282
+ try:
1283
+ esgvoc_terms = ev.get_all_terms_in_collection(project_name, collection_name)
1284
+ esgvoc_element_ids = [term.id for term in esgvoc_terms]
1285
+
1286
+ console.print(f" Repository: {len(repo_elements)}, ESGVoc: {len(esgvoc_element_ids)} elements")
1287
+
1288
+ missing_elements = [elem for elem in repo_elements if elem not in esgvoc_element_ids]
1289
+ if missing_elements:
1290
+ errors.append(
1291
+ f"❌ Collection '{collection_name}': Elements missing from esgvoc: {missing_elements}"
1292
+ )
1293
+
1294
+ # Debug missing elements source tracking
1295
+ if self.debug_missing_terms:
1296
+ console.print(f" [dim]Missing elements and their sources:[/dim]")
1297
+ for elem in missing_elements:
1298
+ source_info = repo_element_sources.get(
1299
+ elem, {"file": "unknown", "from_id_field": False}
1300
+ )
1301
+ id_source = "id field" if source_info["from_id_field"] else "filename"
1302
+ console.print(f" [dim] • {elem} (from {source_info['file']} {id_source})[/dim]")
1303
+
1304
+ # Detailed debugging for each missing element (if enabled)
1305
+ if self.debug_missing_terms:
1306
+ console.print(
1307
+ f"\n[bold red]📋 Detailed analysis of missing elements in '{collection_name}':[/bold red]"
1308
+ )
1309
+ for missing_element in missing_elements:
1310
+ self._debug_missing_term(project_name, collection_name, missing_element, repo_path)
1311
+ else:
1312
+ console.print(f"[dim]💡 Use --debug-terms for detailed analysis of missing elements[/dim]")
1313
+ else:
1314
+ console.print(f" [green]✅ All elements in '{collection_name}' are queryable[/green]")
1315
+
1316
+ except Exception as e:
1317
+ # Try to identify which specific term is failing
1318
+ error_msg = f"❌ Failed to get terms from collection '{collection_name}': {e}"
1319
+
1320
+ # Attempt to identify the failing term by testing each one individually
1321
+ try:
1322
+ console.print(f" [yellow]⚠️ Attempting to identify failing term...[/yellow]")
1323
+ for repo_elem in repo_elements:
1324
+ try:
1325
+ ev.get_term_in_collection(project_name, collection_name, repo_elem)
1326
+ except Exception as term_error:
1327
+ error_msg += f"\n → Failing term: '{repo_elem}' - {term_error}"
1328
+ break
1329
+ except:
1330
+ pass # If we can't identify the specific term, just use the original error
1331
+
1332
+ errors.append(error_msg)
1333
+
1334
+ # Test 5: General API functions
1335
+ try:
1336
+ all_terms = ev.get_all_terms_in_all_projects()
1337
+ console.print(f"[blue]📊 ESGVoc API returned {len(all_terms)} total terms across all projects[/blue]")
1338
+ except Exception as e:
1339
+ errors.append(f"❌ Failed to get all terms from esgvoc: {e}")
1340
+
1341
+ # Summary
1342
+ if errors:
1343
+ console.print(f"\n[red]❌ ESGVoc API validation failed with {len(errors)} errors:[/red]")
1344
+ for error in errors:
1345
+ console.print(f" {error}")
1346
+ return False
1347
+ else:
1348
+ console.print("\n[green]✅ ESGVoc API validation passed![/green]")
1349
+ console.print(f"✅ Validated {len(repo_collections)} collections")
1350
+ console.print("✅ All repository elements accessible through esgvoc API")
1351
+ return True
1352
+
1353
+ def run_complete_test(
1354
+ self,
1355
+ project_name: str,
1356
+ repo_url: str = None,
1357
+ branch: str = None,
1358
+ repo_path: str = None,
1359
+ esgvoc_branch: str = None,
1360
+ universe_branch: str = None,
1361
+ ) -> bool:
1362
+ """
1363
+ Run complete CV testing pipeline
1364
+
1365
+ Args:
1366
+ project_name: Name of the project to test
1367
+ repo_url: Custom repository URL (optional)
1368
+ branch: Custom branch (optional)
1369
+ repo_path: Path to repository for structure testing (optional - auto-detected if not provided)
1370
+ esgvoc_branch: ESGVoc library branch (for info only)
1371
+ universe_branch: Custom universe branch (optional)
1372
+
1373
+ Returns:
1374
+ bool: True if all tests pass
1375
+ """
1376
+ console.print(f"[bold blue]🚀 Starting complete CV test for project: {project_name}[/bold blue]")
1377
+
1378
+ success = True
1379
+
1380
+ # Step 1: Configure esgvoc
1381
+ if not self.configure_for_testing(project_name, repo_url, branch, esgvoc_branch, universe_branch):
1382
+ return False
1383
+
1384
+ # Step 2: Synchronize CVs
1385
+ if not self.synchronize_cvs():
1386
+ success = False
1387
+
1388
+ # Step 2.5: Validate universe for warnings
1389
+ self._validate_universe_warnings()
1390
+
1391
+ # Step 3: Determine repository path AFTER synchronization - use downloaded CV repository if not specified
1392
+ if repo_path is None:
1393
+ # Use the state service to get the actual project path directly
1394
+ try:
1395
+ current_state = service.get_state()
1396
+ if hasattr(current_state, "projects") and project_name in current_state.projects:
1397
+ project_state = current_state.projects[project_name]
1398
+ if hasattr(project_state, "local_path") and project_state.local_path:
1399
+ repo_path = str(project_state.local_path)
1400
+ console.print(f"[blue]Using CV repository from state service: {repo_path}[/blue]")
1401
+ else:
1402
+ console.print("[dim]Debug: Project state has no local_path[/dim]")
1403
+ else:
1404
+ console.print(f"[dim]Debug: Project {project_name} not found in state service projects[/dim]")
1405
+ console.print(
1406
+ f"[dim]Debug: Available projects in state: {list(current_state.projects.keys()) if hasattr(current_state, 'projects') else 'No projects'}[/dim]"
1407
+ )
1408
+ except Exception as e:
1409
+ console.print(f"[dim]Debug: Error accessing state service: {e}[/dim]")
1410
+
1411
+ # Fallback: try to find the repository using the known default local path
1412
+ if repo_path is None:
1413
+ try:
1414
+ from esgvoc.core.service.configuration.setting import ServiceSettings
1415
+
1416
+ default_configs = ServiceSettings._get_default_project_configs()
1417
+ if project_name in default_configs:
1418
+ default_local_path = default_configs[project_name]["local_path"]
1419
+ config_manager = service.get_config_manager()
1420
+
1421
+ # Try different path constructions to find where the repository actually is
1422
+ possible_paths = [
1423
+ config_manager.data_config_dir / default_local_path,
1424
+ config_manager.data_dir / self.test_config_name / default_local_path,
1425
+ config_manager.data_dir / default_local_path,
1426
+ ]
1427
+
1428
+ # Also check in other configuration directories
1429
+ if config_manager.data_dir.exists():
1430
+ for config_dir in config_manager.data_dir.iterdir():
1431
+ if config_dir.is_dir():
1432
+ possible_repo_path = config_dir / default_local_path
1433
+ if possible_repo_path.exists():
1434
+ possible_paths.append(possible_repo_path)
1435
+
1436
+ for path in possible_paths:
1437
+ if path and path.exists():
1438
+ repo_path = str(path)
1439
+ console.print(f"[blue]Found CV repository at: {repo_path}[/blue]")
1440
+ break
1441
+ except Exception as e:
1442
+ console.print(f"[dim]Debug: Error in fallback path detection: {e}[/dim]")
1443
+
1444
+ # Final fallback
1445
+ if repo_path is None:
1446
+ repo_path = "."
1447
+ console.print("[yellow]⚠️ Could not determine CV repository path, using current directory[/yellow]")
1448
+
1449
+ # Step 3: Test repository structure
1450
+ console.print(f"[dim]Debug: About to test repository structure with path: {repo_path}[/dim]")
1451
+ try:
1452
+ if not self.test_repository_structure(repo_path):
1453
+ success = False
1454
+ except Exception as e:
1455
+ console.print(f"[red]❌ Repository structure test failed with exception: {e}[/red]")
1456
+ success = False
1457
+
1458
+ # Debug: Check what configuration is active before API test
1459
+ current_active = service.get_config_manager().get_active_config_name()
1460
+ console.print(f"[dim]Debug: Active config before API test: {current_active}[/dim]")
1461
+
1462
+ # Step 4: Test YAML specs ingestion compatibility
1463
+ console.print(f"[blue]Testing YAML specs ingestion compatibility...[/blue]")
1464
+ ingestion_errors = self._test_esgvoc_specs_ingestion(project_name, Path(repo_path))
1465
+ if ingestion_errors:
1466
+ console.print(f"[red]❌ YAML specs ingestion test failed with {len(ingestion_errors)} errors:[/red]")
1467
+ for error in ingestion_errors:
1468
+ console.print(f" {error}")
1469
+ success = False
1470
+ else:
1471
+ console.print(f"[green]✅ YAML specs ingestion test passed![/green]")
1472
+
1473
+ # Step 5: Test esgvoc API access
1474
+ if not self.test_esgvoc_api_access(project_name, repo_path):
1475
+ success = False
1476
+
1477
+ # Summary
1478
+ if success:
1479
+ console.print(f"\n[bold green]🎉 All tests passed for project '{project_name}'![/bold green]")
1480
+ else:
1481
+ console.print(f"\n[bold red]❌ Some tests failed for project '{project_name}'[/bold red]")
1482
+
1483
+ return success
1484
+
1485
+ def restore_original_configuration(self):
1486
+ """Restore the original esgvoc configuration"""
1487
+ try:
1488
+ if self.config_manager and self.original_config_name:
1489
+ # Switch back to original configuration
1490
+ console.print(f"[blue]Restoring original configuration: {self.original_config_name}[/blue]")
1491
+ self.config_manager.switch_config(self.original_config_name)
1492
+
1493
+ # CRITICAL: Restore the original data_config_dir
1494
+ self.config_manager.data_config_dir = self.config_manager.data_dir / self.original_config_name
1495
+ self.config_manager.data_config_dir.mkdir(parents=True, exist_ok=True)
1496
+ console.print(f"[dim]Debug: Restored data_config_dir to: {self.config_manager.data_config_dir}[/dim]")
1497
+
1498
+ # Reset service state
1499
+ service.current_state = service.get_state()
1500
+
1501
+ # Clean up test_cv_temp data directories (repos and dbs)
1502
+ import shutil
1503
+ test_data_dir = self.config_manager.data_dir / self.test_config_name
1504
+ if test_data_dir.exists():
1505
+ console.print(f"[blue]Cleaning up test data directories...[/blue]")
1506
+ try:
1507
+ shutil.rmtree(test_data_dir)
1508
+ console.print(f"[green] ✓ Removed: {test_data_dir}[/green]")
1509
+ except Exception as e:
1510
+ console.print(f"[yellow] Warning: Failed to clean test data directories: {e}[/yellow]")
1511
+
1512
+ # Remove temporary test configuration
1513
+ configs = self.config_manager.list_configs()
1514
+ if self.test_config_name in configs:
1515
+ console.print(f"[blue]Removing temporary test configuration: {self.test_config_name}[/blue]")
1516
+ self.config_manager.remove_config(self.test_config_name)
1517
+
1518
+ console.print(f"[green]✅ Restored original configuration: {self.original_config_name}[/green]")
1519
+ except Exception as e:
1520
+ console.print(f"[yellow]⚠️ Error restoring original configuration: {e}[/yellow]")
1521
+
1522
+ def cleanup(self):
1523
+ """Cleanup resources and restore original configuration"""
1524
+ self.restore_original_configuration()
1525
+
1526
+
1527
+ def main():
1528
+ """Main CLI interface"""
1529
+ if len(sys.argv) < 2:
1530
+ print("Usage: cv_tester.py <command> [options]")
1531
+ print("\nCommands:")
1532
+ print(" list - List available projects")
1533
+ print(" configure <project> - Configure esgvoc for testing")
1534
+ print(" test <project> - Run complete test suite")
1535
+ print(" structure <path> - Test repository structure only")
1536
+ print(" api <project> <path> - Test esgvoc API access only")
1537
+ print("\nEnvironment variables:")
1538
+ print(" TEST_BRANCH - Custom project branch to test")
1539
+ print(" REPO_URL - Custom repository URL")
1540
+ print(" UNIVERSE_BRANCH - Custom universe branch to test")
1541
+ print(" ESGVOC_LIBRARY_BRANCH - ESGVoc library branch (for info)")
1542
+ sys.exit(1)
1543
+
1544
+ command = sys.argv[1]
1545
+ tester = CVTester()
1546
+
1547
+ try:
1548
+ if command == "list":
1549
+ projects = tester.get_available_projects()
1550
+ console.print(f"[blue]Available projects ({len(projects)}):[/blue]")
1551
+ for project in projects:
1552
+ config = ServiceSettings._get_default_project_configs()[project]
1553
+ console.print(f" [cyan]{project}[/cyan] - {config['github_repo']} (branch: {config['branch']})")
1554
+
1555
+ elif command == "configure":
1556
+ if len(sys.argv) < 3:
1557
+ console.print("[red]Error: Project name required[/red]")
1558
+ sys.exit(1)
1559
+
1560
+ project_name = sys.argv[2]
1561
+ repo_url = os.environ.get("REPO_URL")
1562
+ branch = os.environ.get("TEST_BRANCH")
1563
+ esgvoc_branch = os.environ.get("ESGVOC_LIBRARY_BRANCH")
1564
+
1565
+ if tester.configure_for_testing(project_name, repo_url, branch, esgvoc_branch):
1566
+ if tester.synchronize_cvs():
1567
+ console.print("[green]✅ Configuration complete[/green]")
1568
+ else:
1569
+ sys.exit(1)
1570
+ else:
1571
+ sys.exit(1)
1572
+
1573
+ elif command == "test":
1574
+ if len(sys.argv) < 3:
1575
+ console.print("[red]Error: Project name required[/red]")
1576
+ sys.exit(1)
1577
+
1578
+ project_name = sys.argv[2]
1579
+ repo_url = os.environ.get("REPO_URL")
1580
+ branch = os.environ.get("TEST_BRANCH")
1581
+ repo_path = sys.argv[3] if len(sys.argv) > 3 else "."
1582
+ esgvoc_branch = os.environ.get("ESGVOC_LIBRARY_BRANCH")
1583
+
1584
+ success = tester.run_complete_test(project_name, repo_url, branch, repo_path, esgvoc_branch)
1585
+ sys.exit(0 if success else 1)
1586
+
1587
+ elif command == "structure":
1588
+ repo_path = sys.argv[2] if len(sys.argv) > 2 else "."
1589
+ success = tester.test_repository_structure(repo_path)
1590
+ sys.exit(0 if success else 1)
1591
+
1592
+ elif command == "api":
1593
+ if len(sys.argv) < 3:
1594
+ console.print("[red]Error: Project name required[/red]")
1595
+ sys.exit(1)
1596
+
1597
+ project_name = sys.argv[2]
1598
+ repo_path = sys.argv[3] if len(sys.argv) > 3 else "."
1599
+ success = tester.test_esgvoc_api_access(project_name, repo_path)
1600
+ sys.exit(0 if success else 1)
1601
+
1602
+ else:
1603
+ console.print(f"[red]Error: Unknown command '{command}'[/red]")
1604
+ sys.exit(1)
1605
+
1606
+ finally:
1607
+ tester.cleanup()
1608
+
1609
+
1610
+ if __name__ == "__main__":
1611
+ main()