esgvoc 1.0.0__py3-none-any.whl → 1.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of esgvoc might be problematic. Click here for more details.
- esgvoc/__init__.py +1 -1
- esgvoc/api/__init__.py +0 -6
- esgvoc/api/data_descriptors/__init__.py +8 -0
- esgvoc/api/data_descriptors/archive.py +5 -0
- esgvoc/api/data_descriptors/citation_url.py +5 -0
- esgvoc/api/data_descriptors/experiment.py +2 -2
- esgvoc/api/data_descriptors/known_branded_variable.py +58 -5
- esgvoc/api/data_descriptors/member_id.py +9 -0
- esgvoc/api/data_descriptors/regex.py +5 -0
- esgvoc/api/data_descriptors/vertical_label.py +2 -2
- esgvoc/api/project_specs.py +48 -130
- esgvoc/api/projects.py +185 -66
- esgvoc/apps/drs/generator.py +103 -85
- esgvoc/apps/drs/validator.py +22 -38
- esgvoc/apps/jsg/json_schema_generator.py +255 -130
- esgvoc/apps/jsg/templates/template.jinja +249 -0
- esgvoc/apps/test_cv/README.md +214 -0
- esgvoc/apps/test_cv/cv_tester.py +1368 -0
- esgvoc/apps/test_cv/example_usage.py +216 -0
- esgvoc/apps/vr/__init__.py +12 -0
- esgvoc/apps/vr/build_variable_registry.py +71 -0
- esgvoc/apps/vr/example_usage.py +60 -0
- esgvoc/apps/vr/vr_app.py +333 -0
- esgvoc/cli/config.py +671 -86
- esgvoc/cli/drs.py +39 -21
- esgvoc/cli/main.py +2 -0
- esgvoc/cli/test_cv.py +257 -0
- esgvoc/core/constants.py +10 -7
- esgvoc/core/data_handler.py +24 -22
- esgvoc/core/db/connection.py +7 -0
- esgvoc/core/db/project_ingestion.py +34 -9
- esgvoc/core/db/universe_ingestion.py +1 -2
- esgvoc/core/service/configuration/setting.py +192 -21
- esgvoc/core/service/data_merger.py +1 -1
- esgvoc/core/service/state.py +18 -2
- {esgvoc-1.0.0.dist-info → esgvoc-1.1.1.dist-info}/METADATA +2 -3
- {esgvoc-1.0.0.dist-info → esgvoc-1.1.1.dist-info}/RECORD +41 -30
- esgvoc/apps/jsg/cmip6_template.json +0 -74
- esgvoc/apps/jsg/cmip6plus_template.json +0 -74
- /esgvoc/apps/{py.typed → test_cv/__init__.py} +0 -0
- {esgvoc-1.0.0.dist-info → esgvoc-1.1.1.dist-info}/WHEEL +0 -0
- {esgvoc-1.0.0.dist-info → esgvoc-1.1.1.dist-info}/entry_points.txt +0 -0
- {esgvoc-1.0.0.dist-info → esgvoc-1.1.1.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -0,0 +1,1368 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
CV Testing Application for ESGVoc
|
|
4
|
+
|
|
5
|
+
This application allows testing of project CVs and Universe CVs with support for:
|
|
6
|
+
- Custom repository URLs and branches via CLI options and environment variables
|
|
7
|
+
- Universe branch override for testing against different WCRP-universe versions
|
|
8
|
+
- Validation of repository structure and content
|
|
9
|
+
- Testing YAML specification files (project_specs.yaml, drs_specs.yaml, catalog_spec.yaml, attr_specs.yaml)
|
|
10
|
+
- Testing esgvoc API integration with CV repositories
|
|
11
|
+
- Support for all available default projects: cmip6, cmip6plus, input4mip, obs4mip, cordex-cmip6
|
|
12
|
+
- Rich CLI interface integrated with esgvoc CLI
|
|
13
|
+
- Environment variable support for CI/CD integration
|
|
14
|
+
- Automatic repository path detection for synchronized CVs
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import json
|
|
18
|
+
import os
|
|
19
|
+
import sys
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import List
|
|
22
|
+
|
|
23
|
+
from pydantic import ValidationError
|
|
24
|
+
from rich.console import Console
|
|
25
|
+
|
|
26
|
+
import esgvoc.core.service as service
|
|
27
|
+
from esgvoc.core.service.configuration.setting import (
|
|
28
|
+
ServiceSettings,
|
|
29
|
+
)
|
|
30
|
+
from esgvoc.core.service.state import StateService
|
|
31
|
+
|
|
32
|
+
console = Console()
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def detect_project_name() -> str:
|
|
36
|
+
"""
|
|
37
|
+
Try to auto-detect project name from current directory or environment.
|
|
38
|
+
Falls back to a reasonable default for testing.
|
|
39
|
+
"""
|
|
40
|
+
# Check environment first
|
|
41
|
+
env_project = os.environ.get("PROJECT_NAME")
|
|
42
|
+
if env_project:
|
|
43
|
+
return env_project.lower()
|
|
44
|
+
|
|
45
|
+
# Try to detect from current directory name or path
|
|
46
|
+
cwd = Path.cwd()
|
|
47
|
+
dir_name = cwd.name.lower()
|
|
48
|
+
|
|
49
|
+
# Check if directory name matches any known project patterns
|
|
50
|
+
project_patterns = {
|
|
51
|
+
"obs4mips": ["obs4mips", "obs4mip"],
|
|
52
|
+
"input4mips": ["input4mips", "input4mip"],
|
|
53
|
+
"cmip6": ["cmip6"],
|
|
54
|
+
"cmip6plus": ["cmip6plus", "cmip6+"],
|
|
55
|
+
"cordex-cmip6": ["cordex-cmip6", "cordex", "cordexcmip6"],
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
for project, patterns in project_patterns.items():
|
|
59
|
+
if any(pattern in dir_name for pattern in patterns):
|
|
60
|
+
return project
|
|
61
|
+
|
|
62
|
+
# Check parent directories
|
|
63
|
+
for parent in cwd.parents:
|
|
64
|
+
parent_name = parent.name.lower()
|
|
65
|
+
for project, patterns in project_patterns.items():
|
|
66
|
+
if any(pattern in parent_name for pattern in patterns):
|
|
67
|
+
return project
|
|
68
|
+
|
|
69
|
+
# Default fallback
|
|
70
|
+
console.print("[yellow]⚠️ Could not auto-detect project, using 'obs4mip' as default[/yellow]")
|
|
71
|
+
return "obs4mip"
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class CVTester:
|
|
75
|
+
"""Main CV testing class"""
|
|
76
|
+
|
|
77
|
+
def __init__(self, debug_missing_terms: bool = True):
|
|
78
|
+
self.original_config_name = None
|
|
79
|
+
self.test_config_name = "test_cv_temp"
|
|
80
|
+
self.config_manager = None
|
|
81
|
+
self.debug_missing_terms = debug_missing_terms
|
|
82
|
+
|
|
83
|
+
def get_available_projects(self) -> List[str]:
|
|
84
|
+
"""Get list of all available project CVs"""
|
|
85
|
+
return list(ServiceSettings.DEFAULT_PROJECT_CONFIGS.keys())
|
|
86
|
+
|
|
87
|
+
def configure_for_testing(
|
|
88
|
+
self,
|
|
89
|
+
project_name: str = None,
|
|
90
|
+
repo_url: str = None,
|
|
91
|
+
branch: str = None,
|
|
92
|
+
esgvoc_branch: str = None,
|
|
93
|
+
universe_branch: str = None,
|
|
94
|
+
) -> bool:
|
|
95
|
+
"""
|
|
96
|
+
Configure esgvoc with custom or default CV settings for testing
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
project_name: Name of the project to test (required)
|
|
100
|
+
repo_url: Custom repository URL (optional - uses default if not provided)
|
|
101
|
+
branch: Custom branch (optional - uses default if not provided)
|
|
102
|
+
esgvoc_branch: ESGVoc library branch (for info only)
|
|
103
|
+
universe_branch: Custom universe branch (optional - uses 'esgvoc' if not provided)
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
bool: True if configuration was successful
|
|
107
|
+
"""
|
|
108
|
+
try:
|
|
109
|
+
# Get config manager and store original active configuration
|
|
110
|
+
self.config_manager = service.get_config_manager()
|
|
111
|
+
self.original_config_name = self.config_manager.get_active_config_name()
|
|
112
|
+
|
|
113
|
+
console.print(f"[blue]Current active configuration: {self.original_config_name}[/blue]")
|
|
114
|
+
|
|
115
|
+
# Determine project configuration
|
|
116
|
+
if project_name not in self.get_available_projects():
|
|
117
|
+
available = ", ".join(self.get_available_projects())
|
|
118
|
+
console.print(f"[red]❌ Unknown project '{project_name}'. Available projects: {available}[/red]")
|
|
119
|
+
return False
|
|
120
|
+
|
|
121
|
+
# Use custom repo/branch if provided, otherwise use defaults
|
|
122
|
+
if repo_url or branch:
|
|
123
|
+
# Custom configuration
|
|
124
|
+
default_config = ServiceSettings.DEFAULT_PROJECT_CONFIGS[project_name]
|
|
125
|
+
project_config = {
|
|
126
|
+
"project_name": project_name,
|
|
127
|
+
"github_repo": repo_url or default_config["github_repo"],
|
|
128
|
+
"branch": branch or default_config["branch"],
|
|
129
|
+
"local_path": default_config["local_path"],
|
|
130
|
+
"db_path": default_config["db_path"],
|
|
131
|
+
}
|
|
132
|
+
console.print(f"[blue]Using custom configuration for {project_name}:[/blue]")
|
|
133
|
+
console.print(f" Repository: {project_config['github_repo']}")
|
|
134
|
+
console.print(f" Branch: {project_config['branch']}")
|
|
135
|
+
else:
|
|
136
|
+
# Default configuration
|
|
137
|
+
project_config = ServiceSettings.DEFAULT_PROJECT_CONFIGS[project_name].copy()
|
|
138
|
+
console.print(f"[blue]Using default configuration for {project_name}[/blue]")
|
|
139
|
+
|
|
140
|
+
# Create temporary test configuration with universe and single project
|
|
141
|
+
test_config_data = {
|
|
142
|
+
"universe": {
|
|
143
|
+
"github_repo": "https://github.com/WCRP-CMIP/WCRP-universe",
|
|
144
|
+
"branch": universe_branch or "esgvoc",
|
|
145
|
+
"local_path": "repos/WCRP-universe",
|
|
146
|
+
"db_path": "dbs/universe.sqlite",
|
|
147
|
+
},
|
|
148
|
+
"projects": [project_config],
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
# Remove existing test config if it exists
|
|
152
|
+
configs = self.config_manager.list_configs()
|
|
153
|
+
if self.test_config_name in configs:
|
|
154
|
+
console.print(f"[yellow]Removing existing test configuration: {self.test_config_name}[/yellow]")
|
|
155
|
+
self.config_manager.remove_config(self.test_config_name)
|
|
156
|
+
|
|
157
|
+
# Create new test configuration
|
|
158
|
+
console.print(f"[blue]Creating temporary test configuration: {self.test_config_name}[/blue]")
|
|
159
|
+
console.print(f"[dim]Debug: Test config data projects: {test_config_data['projects']}[/dim]")
|
|
160
|
+
self.config_manager.add_config(self.test_config_name, test_config_data)
|
|
161
|
+
|
|
162
|
+
# Switch to test configuration
|
|
163
|
+
self.config_manager.switch_config(self.test_config_name)
|
|
164
|
+
console.print(f"[green]✅ Switched to test configuration: {self.test_config_name}[/green]")
|
|
165
|
+
|
|
166
|
+
# CRITICAL FIX: Update the data_config_dir after switching configurations
|
|
167
|
+
# This is the root cause - data_config_dir is set once and never updated
|
|
168
|
+
self.config_manager.data_config_dir = self.config_manager.data_dir / self.test_config_name
|
|
169
|
+
self.config_manager.data_config_dir.mkdir(parents=True, exist_ok=True)
|
|
170
|
+
console.print(f"[dim]Debug: Updated data_config_dir to: {self.config_manager.data_config_dir}[/dim]")
|
|
171
|
+
|
|
172
|
+
# Clear any potential caches in the config manager
|
|
173
|
+
if hasattr(self.config_manager, "_cached_config"):
|
|
174
|
+
self.config_manager._cached_config = None
|
|
175
|
+
if hasattr(self.config_manager, "cache"):
|
|
176
|
+
self.config_manager.cache.clear()
|
|
177
|
+
|
|
178
|
+
# Create fresh StateService with the updated configuration and directory
|
|
179
|
+
fresh_config = self.config_manager.get_config(self.test_config_name)
|
|
180
|
+
service.current_state = service.StateService(fresh_config)
|
|
181
|
+
console.print(f"[dim]Debug: Created fresh StateService for {self.test_config_name}[/dim]")
|
|
182
|
+
|
|
183
|
+
# Debug: Verify the fix worked
|
|
184
|
+
console.print(
|
|
185
|
+
f"[dim]Debug: StateService universe base_dir: {service.current_state.universe.base_dir}[/dim]"
|
|
186
|
+
)
|
|
187
|
+
console.print(
|
|
188
|
+
f"[dim]Debug: StateService universe local_path: {service.current_state.universe.local_path}[/dim]"
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
if esgvoc_branch:
|
|
192
|
+
console.print(f"[dim]Using esgvoc library from branch: {esgvoc_branch}[/dim]")
|
|
193
|
+
|
|
194
|
+
return True
|
|
195
|
+
|
|
196
|
+
except Exception as e:
|
|
197
|
+
console.print(f"[red]❌ Configuration failed: {e}[/red]")
|
|
198
|
+
import traceback
|
|
199
|
+
|
|
200
|
+
console.print(traceback.format_exc())
|
|
201
|
+
return False
|
|
202
|
+
|
|
203
|
+
def synchronize_cvs(self) -> bool:
|
|
204
|
+
"""Synchronize/download the configured CVs"""
|
|
205
|
+
try:
|
|
206
|
+
console.print("[blue]Synchronizing CVs...[/blue]")
|
|
207
|
+
|
|
208
|
+
# Force refresh the state service to ensure it uses the correct configuration
|
|
209
|
+
service.current_state = service.get_state()
|
|
210
|
+
|
|
211
|
+
# Debug: Show what configuration the state service is using
|
|
212
|
+
config_manager = service.get_config_manager()
|
|
213
|
+
active_config = config_manager.get_active_config_name()
|
|
214
|
+
console.print(f"[dim]Debug: Active config during sync: {active_config}[/dim]")
|
|
215
|
+
console.print(f"[dim]Debug: Expected config: {self.test_config_name}[/dim]")
|
|
216
|
+
console.print(f"[dim]Debug: Data config dir during sync: {config_manager.data_config_dir}[/dim]")
|
|
217
|
+
|
|
218
|
+
if active_config != self.test_config_name:
|
|
219
|
+
console.print(
|
|
220
|
+
f"[yellow]⚠️ Warning: Active config mismatch, forcing switch to {self.test_config_name}[/yellow]"
|
|
221
|
+
)
|
|
222
|
+
config_manager.switch_config(self.test_config_name)
|
|
223
|
+
|
|
224
|
+
# Update data_config_dir after forced switch
|
|
225
|
+
config_manager.data_config_dir = config_manager.data_dir / self.test_config_name
|
|
226
|
+
config_manager.data_config_dir.mkdir(parents=True, exist_ok=True)
|
|
227
|
+
|
|
228
|
+
# Clear caches again after forced switch
|
|
229
|
+
if hasattr(config_manager, "_cached_config"):
|
|
230
|
+
config_manager._cached_config = None
|
|
231
|
+
if hasattr(config_manager, "cache"):
|
|
232
|
+
config_manager.cache.clear()
|
|
233
|
+
|
|
234
|
+
# Create fresh StateService with correct configuration
|
|
235
|
+
fresh_config = config_manager.get_config(self.test_config_name)
|
|
236
|
+
service.current_state = StateService(fresh_config)
|
|
237
|
+
console.print(f"[dim]Debug: Recreated StateService for {self.test_config_name}[/dim]")
|
|
238
|
+
|
|
239
|
+
service.current_state.synchronize_all()
|
|
240
|
+
console.print("[green]✅ CVs synchronized successfully[/green]")
|
|
241
|
+
return True
|
|
242
|
+
except Exception as e:
|
|
243
|
+
console.print(f"[red]❌ CV synchronization failed: {e}[/red]")
|
|
244
|
+
import traceback
|
|
245
|
+
|
|
246
|
+
console.print(traceback.format_exc())
|
|
247
|
+
return False
|
|
248
|
+
|
|
249
|
+
def test_repository_structure(self, repo_path: str = ".") -> bool:
|
|
250
|
+
"""
|
|
251
|
+
Test repository structure and file requirements
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
repo_path: Path to the repository to test (default: current directory)
|
|
255
|
+
|
|
256
|
+
Returns:
|
|
257
|
+
bool: True if all tests pass
|
|
258
|
+
"""
|
|
259
|
+
console.print(f"[blue]🧪 Testing repository structure in: {repo_path}[/blue]")
|
|
260
|
+
|
|
261
|
+
repo_dir = Path(repo_path)
|
|
262
|
+
if not repo_dir.exists():
|
|
263
|
+
console.print(f"[red]❌ Repository path does not exist: {repo_path}[/red]")
|
|
264
|
+
return False
|
|
265
|
+
|
|
266
|
+
errors = []
|
|
267
|
+
warnings = []
|
|
268
|
+
|
|
269
|
+
# Get all directories
|
|
270
|
+
all_directories = [p for p in repo_dir.iterdir() if p.is_dir()]
|
|
271
|
+
|
|
272
|
+
# Identify collection directories by presence of .jsonld files
|
|
273
|
+
collection_directories = []
|
|
274
|
+
directories_with_json_but_no_jsonld = []
|
|
275
|
+
|
|
276
|
+
for directory in all_directories:
|
|
277
|
+
files_in_dir = list(directory.iterdir())
|
|
278
|
+
jsonld_files = [f for f in files_in_dir if f.name.endswith(".jsonld")]
|
|
279
|
+
json_files = [f for f in files_in_dir if f.name.endswith(".json") and not f.name.endswith(".jsonld")]
|
|
280
|
+
|
|
281
|
+
if len(jsonld_files) > 0:
|
|
282
|
+
collection_directories.append(directory)
|
|
283
|
+
elif len(json_files) > 0:
|
|
284
|
+
directories_with_json_but_no_jsonld.append(directory)
|
|
285
|
+
|
|
286
|
+
console.print(f"Found {len(collection_directories)} collection directories (with .jsonld files)")
|
|
287
|
+
|
|
288
|
+
# Warn about directories that might be missing context files
|
|
289
|
+
for directory in directories_with_json_but_no_jsonld:
|
|
290
|
+
warnings.append(f"⚠️ Directory '{directory.name}' has .json files but no .jsonld context")
|
|
291
|
+
|
|
292
|
+
# Test each collection directory
|
|
293
|
+
for directory in collection_directories:
|
|
294
|
+
console.print(f"📁 Testing collection: {directory.name}")
|
|
295
|
+
collection_errors = self._test_collection_directory(directory)
|
|
296
|
+
errors.extend(collection_errors)
|
|
297
|
+
|
|
298
|
+
# Add context validation warnings (only if collection passed basic validation)
|
|
299
|
+
if not collection_errors:
|
|
300
|
+
context_warnings = self._validate_context_usage(directory, directory.name)
|
|
301
|
+
for warning in context_warnings:
|
|
302
|
+
console.print(f" {warning}")
|
|
303
|
+
|
|
304
|
+
# Test YAML specification files if they exist
|
|
305
|
+
yaml_specs_errors = self._test_yaml_specs(repo_dir, collection_directories)
|
|
306
|
+
errors.extend(yaml_specs_errors)
|
|
307
|
+
|
|
308
|
+
# Display warnings
|
|
309
|
+
if warnings:
|
|
310
|
+
console.print(f"\n[yellow]Warnings ({len(warnings)}):[/yellow]")
|
|
311
|
+
for warning in warnings:
|
|
312
|
+
console.print(f" {warning}")
|
|
313
|
+
|
|
314
|
+
# Summary
|
|
315
|
+
if errors:
|
|
316
|
+
console.print(f"\n[red]❌ Repository structure validation failed with {len(errors)} errors:[/red]")
|
|
317
|
+
for error in errors:
|
|
318
|
+
console.print(f" {error}")
|
|
319
|
+
return False
|
|
320
|
+
else:
|
|
321
|
+
console.print("\n[green]✅ Repository structure validation passed![/green]")
|
|
322
|
+
console.print(f"✅ Validated {len(collection_directories)} collection directories")
|
|
323
|
+
return True
|
|
324
|
+
|
|
325
|
+
def _test_collection_directory(self, directory: Path) -> List[str]:
|
|
326
|
+
"""Test a single collection directory"""
|
|
327
|
+
errors = []
|
|
328
|
+
|
|
329
|
+
files_in_dir = list(directory.iterdir())
|
|
330
|
+
jsonld_files = [f for f in files_in_dir if f.name.endswith(".jsonld")]
|
|
331
|
+
other_files = [f for f in files_in_dir if not f.name.endswith(".jsonld")]
|
|
332
|
+
|
|
333
|
+
# Test directory structure
|
|
334
|
+
if len(jsonld_files) == 0:
|
|
335
|
+
errors.append(f"❌ {directory.name}: No .jsonld context file found")
|
|
336
|
+
elif len(jsonld_files) > 1:
|
|
337
|
+
console.print(f" [yellow]⚠️ Multiple .jsonld files: {[f.name for f in jsonld_files]}[/yellow]")
|
|
338
|
+
|
|
339
|
+
if len(other_files) == 0:
|
|
340
|
+
errors.append(f"❌ {directory.name}: No element files found")
|
|
341
|
+
|
|
342
|
+
# Test JSONLD context files
|
|
343
|
+
for jsonld_file in jsonld_files:
|
|
344
|
+
try:
|
|
345
|
+
with open(jsonld_file, "r", encoding="utf-8") as f:
|
|
346
|
+
jsonld_content = json.load(f)
|
|
347
|
+
|
|
348
|
+
if "@context" not in jsonld_content:
|
|
349
|
+
errors.append(f"❌ {jsonld_file.name}: Missing '@context' field")
|
|
350
|
+
continue
|
|
351
|
+
|
|
352
|
+
context = jsonld_content["@context"]
|
|
353
|
+
if not isinstance(context, dict):
|
|
354
|
+
errors.append(f"❌ {jsonld_file.name}: '@context' must be a dictionary")
|
|
355
|
+
continue
|
|
356
|
+
|
|
357
|
+
# Check required context fields
|
|
358
|
+
required_fields = ["id", "type", "@base"]
|
|
359
|
+
missing_fields = [field for field in required_fields if field not in context]
|
|
360
|
+
if missing_fields:
|
|
361
|
+
errors.append(f"❌ {jsonld_file.name}: Missing required fields in @context: {missing_fields}")
|
|
362
|
+
|
|
363
|
+
except json.JSONDecodeError as e:
|
|
364
|
+
errors.append(f"❌ {jsonld_file.name}: Invalid JSON syntax - {e}")
|
|
365
|
+
except Exception as e:
|
|
366
|
+
errors.append(f"❌ {jsonld_file.name}: Error reading file - {e}")
|
|
367
|
+
|
|
368
|
+
# Test element files
|
|
369
|
+
json_element_files = [f for f in other_files if f.name.endswith(".json")]
|
|
370
|
+
for element_file in json_element_files:
|
|
371
|
+
try:
|
|
372
|
+
with open(element_file, "r", encoding="utf-8") as f:
|
|
373
|
+
element_content = json.load(f)
|
|
374
|
+
|
|
375
|
+
required_fields = ["id", "type", "@context"]
|
|
376
|
+
missing_fields = [field for field in required_fields if field not in element_content]
|
|
377
|
+
if missing_fields:
|
|
378
|
+
errors.append(f"❌ {element_file.name}: Missing required fields: {missing_fields}")
|
|
379
|
+
|
|
380
|
+
except json.JSONDecodeError as e:
|
|
381
|
+
errors.append(f"❌ {element_file.name}: Invalid JSON syntax - {e}")
|
|
382
|
+
except Exception as e:
|
|
383
|
+
errors.append(f"❌ {element_file.name}: Error reading file - {e}")
|
|
384
|
+
|
|
385
|
+
if not errors:
|
|
386
|
+
console.print(f" [green]✅ Collection '{directory.name}' passed validation[/green]")
|
|
387
|
+
|
|
388
|
+
return errors
|
|
389
|
+
|
|
390
|
+
def _test_yaml_specs(self, repo_dir: Path, collection_directories: List[Path]) -> List[str]:
|
|
391
|
+
"""Test YAML specification files (project_specs.yaml, drs_specs.yaml, catalog_spec.yaml, attr_specs.yaml)"""
|
|
392
|
+
errors = []
|
|
393
|
+
|
|
394
|
+
# Import constants and YAML handling
|
|
395
|
+
try:
|
|
396
|
+
import yaml
|
|
397
|
+
from esgvoc.core.constants import (
|
|
398
|
+
PROJECT_SPECS_FILENAME,
|
|
399
|
+
DRS_SPECS_FILENAME,
|
|
400
|
+
CATALOG_SPECS_FILENAME,
|
|
401
|
+
ATTRIBUTES_SPECS_FILENAME
|
|
402
|
+
)
|
|
403
|
+
except ImportError as e:
|
|
404
|
+
errors.append(f"❌ Missing required dependencies: {e}")
|
|
405
|
+
return errors
|
|
406
|
+
|
|
407
|
+
# Get existing collections for validation
|
|
408
|
+
existing_collections = {d.name for d in collection_directories}
|
|
409
|
+
source_collections = set()
|
|
410
|
+
files_tested = 0
|
|
411
|
+
|
|
412
|
+
# Test project_specs.yaml
|
|
413
|
+
project_specs_file = repo_dir / PROJECT_SPECS_FILENAME
|
|
414
|
+
if project_specs_file.exists():
|
|
415
|
+
console.print(f"📄 Testing {PROJECT_SPECS_FILENAME}...")
|
|
416
|
+
try:
|
|
417
|
+
with open(project_specs_file, "r", encoding="utf-8") as f:
|
|
418
|
+
project_specs = yaml.safe_load(f)
|
|
419
|
+
console.print(f" [green]✅ {PROJECT_SPECS_FILENAME} parsed successfully[/green]")
|
|
420
|
+
files_tested += 1
|
|
421
|
+
except yaml.YAMLError as e:
|
|
422
|
+
errors.append(f"❌ {PROJECT_SPECS_FILENAME}: Invalid YAML syntax - {e}")
|
|
423
|
+
except Exception as e:
|
|
424
|
+
errors.append(f"❌ Error reading {PROJECT_SPECS_FILENAME}: {e}")
|
|
425
|
+
else:
|
|
426
|
+
errors.append(f"❌ Required file {PROJECT_SPECS_FILENAME} not found")
|
|
427
|
+
|
|
428
|
+
# Test drs_specs.yaml
|
|
429
|
+
drs_specs_file = repo_dir / DRS_SPECS_FILENAME
|
|
430
|
+
if drs_specs_file.exists():
|
|
431
|
+
console.print(f"📄 Testing {DRS_SPECS_FILENAME}...")
|
|
432
|
+
try:
|
|
433
|
+
with open(drs_specs_file, "r", encoding="utf-8") as f:
|
|
434
|
+
drs_specs = yaml.safe_load(f)
|
|
435
|
+
|
|
436
|
+
# Extract collection references from DRS specs
|
|
437
|
+
for drs_name, drs_spec in drs_specs.items():
|
|
438
|
+
if isinstance(drs_spec, dict) and "parts" in drs_spec:
|
|
439
|
+
for part in drs_spec["parts"]:
|
|
440
|
+
if isinstance(part, dict):
|
|
441
|
+
# Handle both old format (collection_id) and new format (source_collection)
|
|
442
|
+
collection_ref = part.get("collection_id") or part.get("source_collection")
|
|
443
|
+
if collection_ref:
|
|
444
|
+
source_collections.add(collection_ref)
|
|
445
|
+
|
|
446
|
+
console.print(f" [green]✅ {DRS_SPECS_FILENAME} parsed successfully[/green]")
|
|
447
|
+
files_tested += 1
|
|
448
|
+
except yaml.YAMLError as e:
|
|
449
|
+
errors.append(f"❌ {DRS_SPECS_FILENAME}: Invalid YAML syntax - {e}")
|
|
450
|
+
except Exception as e:
|
|
451
|
+
errors.append(f"❌ Error reading {DRS_SPECS_FILENAME}: {e}")
|
|
452
|
+
else:
|
|
453
|
+
errors.append(f"❌ Required file {DRS_SPECS_FILENAME} not found")
|
|
454
|
+
|
|
455
|
+
# Test catalog_spec.yaml (optional)
|
|
456
|
+
catalog_specs_file = repo_dir / CATALOG_SPECS_FILENAME
|
|
457
|
+
if catalog_specs_file.exists():
|
|
458
|
+
console.print(f"📄 Testing {CATALOG_SPECS_FILENAME}...")
|
|
459
|
+
try:
|
|
460
|
+
with open(catalog_specs_file, "r", encoding="utf-8") as f:
|
|
461
|
+
catalog_specs = yaml.safe_load(f)
|
|
462
|
+
|
|
463
|
+
# Extract collection references from catalog specs
|
|
464
|
+
if isinstance(catalog_specs, dict):
|
|
465
|
+
# Check dataset_properties and file_properties
|
|
466
|
+
for prop_type in ["dataset_properties", "file_properties"]:
|
|
467
|
+
if prop_type in catalog_specs and isinstance(catalog_specs[prop_type], list):
|
|
468
|
+
for prop in catalog_specs[prop_type]:
|
|
469
|
+
if isinstance(prop, dict) and "source_collection" in prop:
|
|
470
|
+
source_collections.add(prop["source_collection"])
|
|
471
|
+
|
|
472
|
+
console.print(f" [green]✅ {CATALOG_SPECS_FILENAME} parsed successfully[/green]")
|
|
473
|
+
files_tested += 1
|
|
474
|
+
except yaml.YAMLError as e:
|
|
475
|
+
errors.append(f"❌ {CATALOG_SPECS_FILENAME}: Invalid YAML syntax - {e}")
|
|
476
|
+
except Exception as e:
|
|
477
|
+
errors.append(f"❌ Error reading {CATALOG_SPECS_FILENAME}: {e}")
|
|
478
|
+
else:
|
|
479
|
+
console.print(f" [yellow]⚠️ Optional file {CATALOG_SPECS_FILENAME} not found[/yellow]")
|
|
480
|
+
|
|
481
|
+
# Test attr_specs.yaml (currently not ingested by esgvoc, but test for syntax)
|
|
482
|
+
attr_specs_file = repo_dir / ATTRIBUTES_SPECS_FILENAME
|
|
483
|
+
if attr_specs_file.exists():
|
|
484
|
+
console.print(f"📄 Testing {ATTRIBUTES_SPECS_FILENAME} (syntax only - not ingested by esgvoc)...")
|
|
485
|
+
try:
|
|
486
|
+
with open(attr_specs_file, "r", encoding="utf-8") as f:
|
|
487
|
+
attr_specs = yaml.safe_load(f)
|
|
488
|
+
|
|
489
|
+
# Extract collection references from attribute specs if they exist
|
|
490
|
+
if isinstance(attr_specs, dict):
|
|
491
|
+
# Check for global_attributes_specs or similar structures
|
|
492
|
+
if "specs" in attr_specs:
|
|
493
|
+
specs = attr_specs["specs"]
|
|
494
|
+
if isinstance(specs, dict):
|
|
495
|
+
for attr_name, attr_spec in specs.items():
|
|
496
|
+
if isinstance(attr_spec, dict) and "source_collection" in attr_spec:
|
|
497
|
+
source_collections.add(attr_spec["source_collection"])
|
|
498
|
+
|
|
499
|
+
console.print(f" [green]✅ {ATTRIBUTES_SPECS_FILENAME} parsed successfully[/green]")
|
|
500
|
+
console.print(f" [yellow]⚠️ Note: {ATTRIBUTES_SPECS_FILENAME} is not currently ingested by esgvoc[/yellow]")
|
|
501
|
+
files_tested += 1
|
|
502
|
+
except yaml.YAMLError as e:
|
|
503
|
+
errors.append(f"❌ {ATTRIBUTES_SPECS_FILENAME}: Invalid YAML syntax - {e}")
|
|
504
|
+
except Exception as e:
|
|
505
|
+
errors.append(f"❌ Error reading {ATTRIBUTES_SPECS_FILENAME}: {e}")
|
|
506
|
+
else:
|
|
507
|
+
console.print(f" [yellow]⚠️ Optional file {ATTRIBUTES_SPECS_FILENAME} not found[/yellow]")
|
|
508
|
+
|
|
509
|
+
# Validate collection references
|
|
510
|
+
if source_collections:
|
|
511
|
+
console.print(f" Found {len(source_collections)} source_collection references")
|
|
512
|
+
|
|
513
|
+
for collection in source_collections:
|
|
514
|
+
if collection not in existing_collections:
|
|
515
|
+
errors.append(f"❌ YAML specs reference non-existent collection: '{collection}'")
|
|
516
|
+
else:
|
|
517
|
+
console.print(f" [green]✅ Reference '{collection}' exists[/green]")
|
|
518
|
+
else:
|
|
519
|
+
console.print(" [yellow]⚠️ No collection references found in YAML specs[/yellow]")
|
|
520
|
+
|
|
521
|
+
if files_tested == 0:
|
|
522
|
+
errors.append("❌ No YAML specification files found")
|
|
523
|
+
else:
|
|
524
|
+
console.print(f" [blue]📊 Successfully tested {files_tested} YAML specification files[/blue]")
|
|
525
|
+
|
|
526
|
+
return errors
|
|
527
|
+
|
|
528
|
+
def _debug_missing_term(self, project_name: str, collection_name: str, term_id: str, repo_path: str = "."):
|
|
529
|
+
"""
|
|
530
|
+
Provide detailed debugging information for a missing term.
|
|
531
|
+
|
|
532
|
+
Args:
|
|
533
|
+
project_name: Name of the project
|
|
534
|
+
collection_name: Name of the collection
|
|
535
|
+
term_id: ID of the missing term
|
|
536
|
+
repo_path: Path to the repository
|
|
537
|
+
"""
|
|
538
|
+
console.print(f"\n[bold yellow]🔍 Debugging missing term: {term_id} in {collection_name}[/bold yellow]")
|
|
539
|
+
|
|
540
|
+
repo_dir = Path(repo_path)
|
|
541
|
+
collection_dir = repo_dir / collection_name
|
|
542
|
+
|
|
543
|
+
# 1. Check if term exists in project repository
|
|
544
|
+
term_file = collection_dir / f"{term_id}.json"
|
|
545
|
+
console.print(f"\n[blue]📁 Project Repository ({project_name}):[/blue]")
|
|
546
|
+
|
|
547
|
+
if term_file.exists():
|
|
548
|
+
try:
|
|
549
|
+
with open(term_file, "r", encoding="utf-8") as f:
|
|
550
|
+
term_content = json.load(f)
|
|
551
|
+
console.print(f" [green]✅ Term found in project: {term_file}[/green]")
|
|
552
|
+
console.print(" [dim]Content:[/dim]")
|
|
553
|
+
formatted_json = json.dumps(term_content, indent=2, ensure_ascii=False)
|
|
554
|
+
for line in formatted_json.split("\n"):
|
|
555
|
+
console.print(f" {line}")
|
|
556
|
+
except Exception as e:
|
|
557
|
+
console.print(f" [red]❌ Error reading term file: {e}[/red]")
|
|
558
|
+
else:
|
|
559
|
+
console.print(f" [red]❌ Term not found in project: {term_file}[/red]")
|
|
560
|
+
|
|
561
|
+
# Try to find the term by searching for files that contain this term_id
|
|
562
|
+
console.print(f" [dim]Searching for files containing term ID '{term_id}'...[/dim]")
|
|
563
|
+
try:
|
|
564
|
+
for json_file in collection_dir.glob("*.json"):
|
|
565
|
+
if json_file.name.endswith(".jsonld"):
|
|
566
|
+
continue
|
|
567
|
+
try:
|
|
568
|
+
with open(json_file, "r", encoding="utf-8") as f:
|
|
569
|
+
content = json.load(f)
|
|
570
|
+
if content.get("id") == term_id:
|
|
571
|
+
console.print(f" [yellow]📄 Found term ID '{term_id}' in file: {json_file.name}[/yellow]")
|
|
572
|
+
console.print(f" [dim]Note: Filename '{json_file.name}' ≠ expected '{term_id}.json'[/dim]")
|
|
573
|
+
console.print(" [dim]Content:[/dim]")
|
|
574
|
+
formatted_json = json.dumps(content, indent=2, ensure_ascii=False)
|
|
575
|
+
for line in formatted_json.split("\n"):
|
|
576
|
+
console.print(f" {line}")
|
|
577
|
+
break
|
|
578
|
+
except Exception:
|
|
579
|
+
continue
|
|
580
|
+
else:
|
|
581
|
+
console.print(f" [dim]No file found containing term ID '{term_id}'[/dim]")
|
|
582
|
+
except Exception as e:
|
|
583
|
+
console.print(f" [dim]Error searching for term: {e}[/dim]")
|
|
584
|
+
|
|
585
|
+
# 2. Check if term exists in universe (using DataMerger to resolve links)
|
|
586
|
+
try:
|
|
587
|
+
current_state = service.get_state()
|
|
588
|
+
if hasattr(current_state, "universe") and current_state.universe.local_path:
|
|
589
|
+
universe_dir = Path(current_state.universe.local_path)
|
|
590
|
+
|
|
591
|
+
console.print(f"\n[blue]🌌 Universe Repository (resolved via DataMerger):[/blue]")
|
|
592
|
+
|
|
593
|
+
# First, try to use DataMerger to resolve the universe term if project term exists
|
|
594
|
+
resolved_universe_term = None
|
|
595
|
+
universe_term_path = None
|
|
596
|
+
project_term_content = None
|
|
597
|
+
|
|
598
|
+
if term_file.exists():
|
|
599
|
+
try:
|
|
600
|
+
# First, read the project term to see what it links to
|
|
601
|
+
with open(term_file, "r", encoding="utf-8") as f:
|
|
602
|
+
project_term_content = json.load(f)
|
|
603
|
+
|
|
604
|
+
from esgvoc.core.data_handler import JsonLdResource
|
|
605
|
+
from esgvoc.core.service.data_merger import DataMerger
|
|
606
|
+
|
|
607
|
+
# Use DataMerger to resolve the universe term like in project_ingestion.py
|
|
608
|
+
locally_avail = {
|
|
609
|
+
"https://espri-mod.github.io/mip-cmor-tables": str(current_state.universe.local_path)
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
console.print(f" [dim]Attempting DataMerger resolution...[/dim]")
|
|
613
|
+
|
|
614
|
+
# Check if project term has an @id link
|
|
615
|
+
if "@id" in project_term_content:
|
|
616
|
+
console.print(f" [dim]Project term @id: {project_term_content['@id']}[/dim]")
|
|
617
|
+
|
|
618
|
+
# Calculate expected universe path
|
|
619
|
+
if "https://espri-mod.github.io/mip-cmor-tables" in project_term_content["@id"]:
|
|
620
|
+
universe_relative_path = project_term_content["@id"].replace(
|
|
621
|
+
"https://espri-mod.github.io/mip-cmor-tables/", ""
|
|
622
|
+
)
|
|
623
|
+
if not universe_relative_path.endswith(".json"):
|
|
624
|
+
universe_relative_path += ".json"
|
|
625
|
+
universe_term_path = universe_dir / universe_relative_path
|
|
626
|
+
console.print(f" [dim]Expected universe path: {universe_term_path}[/dim]")
|
|
627
|
+
else:
|
|
628
|
+
console.print(f" [dim]Project term has no @id link to universe[/dim]")
|
|
629
|
+
# Even without @id, try to infer the universe path from context base
|
|
630
|
+
try:
|
|
631
|
+
# Read the context file to get the base
|
|
632
|
+
context_file = term_file.parent / "000_context.jsonld"
|
|
633
|
+
if context_file.exists():
|
|
634
|
+
with open(context_file, "r", encoding="utf-8") as f:
|
|
635
|
+
context_content = json.load(f)
|
|
636
|
+
|
|
637
|
+
base_url = context_content.get("@context", {}).get("@base", "")
|
|
638
|
+
if base_url and "https://espri-mod.github.io/mip-cmor-tables" in base_url:
|
|
639
|
+
universe_relative_path = (
|
|
640
|
+
base_url.replace("https://espri-mod.github.io/mip-cmor-tables/", "")
|
|
641
|
+
+ f"{term_id}.json"
|
|
642
|
+
)
|
|
643
|
+
universe_term_path = universe_dir / universe_relative_path
|
|
644
|
+
console.print(f" [dim]Inferred from context @base: {universe_term_path}[/dim]")
|
|
645
|
+
except Exception as e:
|
|
646
|
+
console.print(f" [dim]Could not infer universe path from context: {e}[/dim]")
|
|
647
|
+
|
|
648
|
+
# Debug: Check what the JsonLdResource expansion produces
|
|
649
|
+
json_resource = JsonLdResource(uri=str(term_file))
|
|
650
|
+
console.print(f" [dim]JSON-LD expanded form: {json_resource.expanded}[/dim]")
|
|
651
|
+
|
|
652
|
+
merger_result = DataMerger(
|
|
653
|
+
data=json_resource,
|
|
654
|
+
locally_available=locally_avail,
|
|
655
|
+
).merge_linked_json()
|
|
656
|
+
|
|
657
|
+
if merger_result and len(merger_result) > 1:
|
|
658
|
+
# If we have more than one result, the last one is the fully merged term
|
|
659
|
+
resolved_universe_term = merger_result[-1]
|
|
660
|
+
|
|
661
|
+
console.print(f" [green]✅ Term resolved via DataMerger (merged from universe)[/green]")
|
|
662
|
+
if universe_term_path:
|
|
663
|
+
console.print(f" [dim]Resolved universe path: {universe_term_path}[/dim]")
|
|
664
|
+
console.print(
|
|
665
|
+
f" [dim]Universe file exists: {universe_term_path.exists() if universe_term_path else 'N/A'}[/dim]"
|
|
666
|
+
)
|
|
667
|
+
console.print(" [dim]Merged content:[/dim]")
|
|
668
|
+
formatted_json = json.dumps(resolved_universe_term, indent=2, ensure_ascii=False)
|
|
669
|
+
for line in formatted_json.split("\n"):
|
|
670
|
+
console.print(f" {line}")
|
|
671
|
+
else:
|
|
672
|
+
console.print(
|
|
673
|
+
f" [yellow]⚠️ No universe term linked from project term (merge result length: {len(merger_result) if merger_result else 0})[/yellow]"
|
|
674
|
+
)
|
|
675
|
+
|
|
676
|
+
except Exception as e:
|
|
677
|
+
console.print(f" [red]❌ Error using DataMerger to resolve universe term: {e}[/red]")
|
|
678
|
+
# Still show what the project term was trying to link to
|
|
679
|
+
if project_term_content and "@id" in project_term_content:
|
|
680
|
+
console.print(
|
|
681
|
+
f" [dim]Project term was trying to link to: {project_term_content['@id']}[/dim]"
|
|
682
|
+
)
|
|
683
|
+
universe_relative_path = project_term_content["@id"].replace(
|
|
684
|
+
"https://espri-mod.github.io/mip-cmor-tables/", ""
|
|
685
|
+
)
|
|
686
|
+
if not universe_relative_path.endswith(".json"):
|
|
687
|
+
universe_relative_path += ".json"
|
|
688
|
+
universe_term_path = universe_dir / universe_relative_path
|
|
689
|
+
console.print(
|
|
690
|
+
f" [dim]Expected universe file: {universe_term_path} (exists: {universe_term_path.exists() if universe_term_path else False})[/dim]"
|
|
691
|
+
)
|
|
692
|
+
|
|
693
|
+
# Fallback: also check direct universe path and show resolved universe file if it was calculated
|
|
694
|
+
if not resolved_universe_term:
|
|
695
|
+
# Show the resolved path from DataMerger if we have it
|
|
696
|
+
if universe_term_path and universe_term_path.exists():
|
|
697
|
+
try:
|
|
698
|
+
with open(universe_term_path, "r", encoding="utf-8") as f:
|
|
699
|
+
universe_term_content = json.load(f)
|
|
700
|
+
console.print(
|
|
701
|
+
f" [green]✅ Universe file found at resolved path: {universe_term_path}[/green]"
|
|
702
|
+
)
|
|
703
|
+
console.print(" [dim]Content:[/dim]")
|
|
704
|
+
formatted_json = json.dumps(universe_term_content, indent=2, ensure_ascii=False)
|
|
705
|
+
for line in formatted_json.split("\n"):
|
|
706
|
+
console.print(f" {line}")
|
|
707
|
+
except Exception as e:
|
|
708
|
+
console.print(f" [red]❌ Error reading resolved universe file: {e}[/red]")
|
|
709
|
+
else:
|
|
710
|
+
# Show detailed path info - don't try direct collection path since it's wrong
|
|
711
|
+
console.print(f" [red]❌ Term not found in universe:[/red]")
|
|
712
|
+
if universe_term_path:
|
|
713
|
+
console.print(
|
|
714
|
+
f" [dim]• DataMerger resolved path: {universe_term_path} (exists: {universe_term_path.exists()})[/dim]"
|
|
715
|
+
)
|
|
716
|
+
|
|
717
|
+
# Try direct collection-based path as fallback (but note this may be incorrect for project collections vs universe structure)
|
|
718
|
+
universe_collection_dir = universe_dir / collection_name
|
|
719
|
+
universe_term_file = universe_collection_dir / f"{term_id}.json"
|
|
720
|
+
console.print(
|
|
721
|
+
f" [dim]• Direct collection path: {universe_term_file} (exists: {universe_term_file.exists()})[/dim]"
|
|
722
|
+
)
|
|
723
|
+
|
|
724
|
+
# Try to find similar files in the universe to help debugging
|
|
725
|
+
try:
|
|
726
|
+
if universe_term_path:
|
|
727
|
+
parent_dir = universe_term_path.parent
|
|
728
|
+
if parent_dir.exists():
|
|
729
|
+
similar_files = [
|
|
730
|
+
f.name
|
|
731
|
+
for f in parent_dir.iterdir()
|
|
732
|
+
if f.is_file() and f.suffix == ".json" and term_id.lower() in f.name.lower()
|
|
733
|
+
]
|
|
734
|
+
if similar_files:
|
|
735
|
+
console.print(
|
|
736
|
+
f" [dim]• Similar files in {parent_dir.name}: {similar_files}[/dim]"
|
|
737
|
+
)
|
|
738
|
+
|
|
739
|
+
# Also check if there are files with different casing
|
|
740
|
+
all_files = [
|
|
741
|
+
f.name for f in parent_dir.iterdir() if f.is_file() and f.suffix == ".json"
|
|
742
|
+
]
|
|
743
|
+
casing_matches = [f for f in all_files if f.lower() == f"{term_id.lower()}.json"]
|
|
744
|
+
if casing_matches and casing_matches[0] != f"{term_id}.json":
|
|
745
|
+
console.print(
|
|
746
|
+
f" [dim]• Case mismatch found: {casing_matches[0]} vs {term_id}.json[/dim]"
|
|
747
|
+
)
|
|
748
|
+
except Exception:
|
|
749
|
+
pass
|
|
750
|
+
else:
|
|
751
|
+
console.print(f" [yellow]⚠️ Universe path not available[/yellow]")
|
|
752
|
+
except Exception as e:
|
|
753
|
+
console.print(f" [red]❌ Error accessing universe: {e}[/red]")
|
|
754
|
+
|
|
755
|
+
# 3. Try to query the term via esgvoc API
|
|
756
|
+
console.print(f"\n[blue]🔗 ESGVoc API Query:[/blue]")
|
|
757
|
+
try:
|
|
758
|
+
import esgvoc.api as ev
|
|
759
|
+
|
|
760
|
+
# Try to get the term from project
|
|
761
|
+
try:
|
|
762
|
+
project_terms = ev.get_all_terms_in_collection(project_name, collection_name)
|
|
763
|
+
matching_terms = [term for term in project_terms if term.id == term_id]
|
|
764
|
+
if matching_terms:
|
|
765
|
+
term = matching_terms[0]
|
|
766
|
+
console.print(f" [green]✅ Term found in esgvoc project API[/green]")
|
|
767
|
+
console.print(f" ID: {term.id}")
|
|
768
|
+
console.print(f" Type: {term.type}")
|
|
769
|
+
console.print(f" Label: {getattr(term, 'label', 'N/A')}")
|
|
770
|
+
console.print(f" Description: {getattr(term, 'description', 'N/A')[:100]}...")
|
|
771
|
+
else:
|
|
772
|
+
console.print(f" [red]❌ Term not found in esgvoc project API[/red]")
|
|
773
|
+
except Exception as e:
|
|
774
|
+
console.print(f" [red]❌ Error querying project API: {e}[/red]")
|
|
775
|
+
|
|
776
|
+
# Try to get the term from universe (if available)
|
|
777
|
+
try:
|
|
778
|
+
universe_terms = ev.get_all_terms_in_collection("universe", collection_name)
|
|
779
|
+
matching_universe_terms = [term for term in universe_terms if term.id == term_id]
|
|
780
|
+
if matching_universe_terms:
|
|
781
|
+
term = matching_universe_terms[0]
|
|
782
|
+
console.print(f" [green]✅ Term found in esgvoc universe API[/green]")
|
|
783
|
+
console.print(f" ID: {term.id}")
|
|
784
|
+
console.print(f" Type: {term.type}")
|
|
785
|
+
console.print(f" Label: {getattr(term, 'label', 'N/A')}")
|
|
786
|
+
console.print(f" Description: {getattr(term, 'description', 'N/A')[:100]}...")
|
|
787
|
+
else:
|
|
788
|
+
console.print(f" [red]❌ Term not found in esgvoc universe API[/red]")
|
|
789
|
+
except Exception as e:
|
|
790
|
+
console.print(f" [red]❌ Error querying universe API: {e}[/red]")
|
|
791
|
+
|
|
792
|
+
except Exception as e:
|
|
793
|
+
console.print(f" [red]❌ Error importing esgvoc API: {e}[/red]")
|
|
794
|
+
|
|
795
|
+
def _validate_context_usage(self, collection_dir: Path, collection_name: str) -> list:
|
|
796
|
+
"""
|
|
797
|
+
Validate context usage and detect potential issues.
|
|
798
|
+
|
|
799
|
+
Returns:
|
|
800
|
+
list: List of warning messages
|
|
801
|
+
"""
|
|
802
|
+
warnings = []
|
|
803
|
+
|
|
804
|
+
try:
|
|
805
|
+
context_file = collection_dir / "000_context.jsonld"
|
|
806
|
+
if not context_file.exists():
|
|
807
|
+
return warnings
|
|
808
|
+
|
|
809
|
+
# Read context
|
|
810
|
+
with open(context_file, "r", encoding="utf-8") as f:
|
|
811
|
+
context_data = json.load(f)
|
|
812
|
+
|
|
813
|
+
context_mappings = context_data.get("@context", {})
|
|
814
|
+
if not isinstance(context_mappings, dict):
|
|
815
|
+
return warnings
|
|
816
|
+
|
|
817
|
+
# Get all JSON term files
|
|
818
|
+
term_files = [f for f in collection_dir.glob("*.json") if not f.name.endswith(".jsonld")]
|
|
819
|
+
|
|
820
|
+
# Track context key usage
|
|
821
|
+
context_keys_used = set()
|
|
822
|
+
term_properties_used = set()
|
|
823
|
+
terms_using_base_expansion = []
|
|
824
|
+
|
|
825
|
+
for term_file in term_files:
|
|
826
|
+
try:
|
|
827
|
+
with open(term_file, "r", encoding="utf-8") as f:
|
|
828
|
+
term_content = json.load(f)
|
|
829
|
+
|
|
830
|
+
# Check what properties and values are used in the term
|
|
831
|
+
for key, value in term_content.items():
|
|
832
|
+
if key not in ["@context", "@id", "@type"]:
|
|
833
|
+
term_properties_used.add(key)
|
|
834
|
+
|
|
835
|
+
# Check if this property has a shortcut in context
|
|
836
|
+
if key in context_mappings:
|
|
837
|
+
context_keys_used.add(key)
|
|
838
|
+
|
|
839
|
+
# Check if property values use context shortcuts
|
|
840
|
+
# For example: "type": "source" where context has "source": "https://..."
|
|
841
|
+
if isinstance(value, str) and value in context_mappings:
|
|
842
|
+
context_keys_used.add(value)
|
|
843
|
+
|
|
844
|
+
# Check if term relies on @base expansion (has simple id but no explicit @id)
|
|
845
|
+
term_id = term_content.get("id", term_file.stem)
|
|
846
|
+
if "id" in term_content and "@id" not in term_content and "@base" in context_mappings:
|
|
847
|
+
terms_using_base_expansion.append({"file": term_file.name, "id": term_id})
|
|
848
|
+
|
|
849
|
+
except Exception as e:
|
|
850
|
+
continue
|
|
851
|
+
|
|
852
|
+
# Check for unused context keys (excluding standard JSON-LD keys)
|
|
853
|
+
standard_keys = {"@base", "@vocab", "@language", "@version", "id", "type"}
|
|
854
|
+
defined_keys = set(context_mappings.keys()) - standard_keys
|
|
855
|
+
unused_keys = defined_keys - context_keys_used
|
|
856
|
+
|
|
857
|
+
if unused_keys:
|
|
858
|
+
warnings.append(f"⚠️ Context defines unused keys in '{collection_name}': {sorted(unused_keys)}")
|
|
859
|
+
|
|
860
|
+
# Check for properties without shortcuts
|
|
861
|
+
properties_without_shortcuts = term_properties_used - context_keys_used - {"id", "type"}
|
|
862
|
+
if properties_without_shortcuts:
|
|
863
|
+
warnings.append(
|
|
864
|
+
f"⚠️ Properties used without context shortcuts in '{collection_name}': {sorted(properties_without_shortcuts)}"
|
|
865
|
+
)
|
|
866
|
+
|
|
867
|
+
# Check for filename/ID mismatches
|
|
868
|
+
filename_id_mismatches = []
|
|
869
|
+
for term_file in term_files:
|
|
870
|
+
try:
|
|
871
|
+
with open(term_file, "r", encoding="utf-8") as f:
|
|
872
|
+
term_content = json.load(f)
|
|
873
|
+
|
|
874
|
+
expected_id = term_file.stem # filename without .json extension
|
|
875
|
+
actual_id = term_content.get("id")
|
|
876
|
+
|
|
877
|
+
if actual_id and actual_id != expected_id:
|
|
878
|
+
filename_id_mismatches.append(
|
|
879
|
+
{"file": term_file.name, "expected_id": expected_id, "actual_id": actual_id}
|
|
880
|
+
)
|
|
881
|
+
except Exception:
|
|
882
|
+
continue
|
|
883
|
+
|
|
884
|
+
if filename_id_mismatches:
|
|
885
|
+
warnings.append(f"⚠️ Filename/ID mismatches in '{collection_name}':")
|
|
886
|
+
for mismatch in filename_id_mismatches[:5]: # Show first 5
|
|
887
|
+
warnings.append(
|
|
888
|
+
f" • {mismatch['file']}: id='{mismatch['actual_id']}' (expected '{mismatch['expected_id']}')"
|
|
889
|
+
)
|
|
890
|
+
if len(filename_id_mismatches) > 5:
|
|
891
|
+
warnings.append(f" • ... and {len(filename_id_mismatches) - 5} more mismatches")
|
|
892
|
+
|
|
893
|
+
# Base expansion is normal JSON-LD behavior - only report if there might be issues
|
|
894
|
+
# For now, we'll skip this since @base expansion is the expected pattern
|
|
895
|
+
|
|
896
|
+
# Only warn about @base vs shortcuts if they're used for the same purpose
|
|
897
|
+
# @base is for term identity URLs, shortcuts are for property/type values - this is normal
|
|
898
|
+
# We could add more sophisticated conflict detection here if needed
|
|
899
|
+
|
|
900
|
+
except Exception as e:
|
|
901
|
+
warnings.append(f"⚠️ Error validating context usage in '{collection_name}': {e}")
|
|
902
|
+
|
|
903
|
+
return warnings
|
|
904
|
+
|
|
905
|
+
def _validate_universe_warnings(self) -> bool:
|
|
906
|
+
"""
|
|
907
|
+
Validate universe repository for potential issues and display warnings.
|
|
908
|
+
|
|
909
|
+
Returns:
|
|
910
|
+
bool: True if universe validation completed (warnings don't fail the test)
|
|
911
|
+
"""
|
|
912
|
+
try:
|
|
913
|
+
current_state = service.get_state()
|
|
914
|
+
if not hasattr(current_state, "universe") or not current_state.universe.local_path:
|
|
915
|
+
console.print(f"[dim]⚠️ Universe path not available for validation[/dim]")
|
|
916
|
+
return True
|
|
917
|
+
|
|
918
|
+
universe_dir = Path(current_state.universe.local_path)
|
|
919
|
+
if not universe_dir.exists():
|
|
920
|
+
console.print(f"[dim]⚠️ Universe directory not found: {universe_dir}[/dim]")
|
|
921
|
+
return True
|
|
922
|
+
|
|
923
|
+
console.print(f"[blue]🌌 Validating Universe Repository: {universe_dir.name}[/blue]")
|
|
924
|
+
|
|
925
|
+
# Find universe collections (directories with JSON files)
|
|
926
|
+
universe_collections = []
|
|
927
|
+
for item in universe_dir.iterdir():
|
|
928
|
+
if item.is_dir():
|
|
929
|
+
json_files = list(item.glob("*.json"))
|
|
930
|
+
jsonld_files = [f for f in json_files if f.name.endswith(".jsonld")]
|
|
931
|
+
regular_json_files = [f for f in json_files if not f.name.endswith(".jsonld")]
|
|
932
|
+
|
|
933
|
+
if regular_json_files:
|
|
934
|
+
universe_collections.append(item)
|
|
935
|
+
|
|
936
|
+
console.print(f"Found {len(universe_collections)} universe collections to validate")
|
|
937
|
+
|
|
938
|
+
total_warnings = 0
|
|
939
|
+
for collection_dir in universe_collections:
|
|
940
|
+
warnings = self._validate_context_usage(collection_dir, collection_dir.name)
|
|
941
|
+
if warnings:
|
|
942
|
+
console.print(f"📁 Universe collection '{collection_dir.name}':")
|
|
943
|
+
for warning in warnings:
|
|
944
|
+
console.print(f" {warning}")
|
|
945
|
+
total_warnings += 1
|
|
946
|
+
|
|
947
|
+
if total_warnings == 0:
|
|
948
|
+
console.print("✅ No validation warnings found in universe")
|
|
949
|
+
else:
|
|
950
|
+
console.print(f"⚠️ Found {total_warnings} validation warnings in universe")
|
|
951
|
+
|
|
952
|
+
console.print("") # Add spacing before project validation
|
|
953
|
+
return True
|
|
954
|
+
|
|
955
|
+
except Exception as e:
|
|
956
|
+
console.print(f"[red]❌ Error validating universe: {e}[/red]")
|
|
957
|
+
return True # Don't fail the test for universe validation errors
|
|
958
|
+
|
|
959
|
+
def test_esgvoc_api_access(self, project_name: str, repo_path: str = ".") -> bool:
|
|
960
|
+
"""
|
|
961
|
+
Test that all repository collections and elements are queryable via esgvoc API
|
|
962
|
+
|
|
963
|
+
Args:
|
|
964
|
+
project_name: Name of the project being tested
|
|
965
|
+
repo_path: Path to the repository (default: current directory)
|
|
966
|
+
|
|
967
|
+
Returns:
|
|
968
|
+
bool: True if all API tests pass
|
|
969
|
+
"""
|
|
970
|
+
console.print(f"[blue]🔍 Testing esgvoc API access for project: {project_name}[/blue]")
|
|
971
|
+
|
|
972
|
+
try:
|
|
973
|
+
import esgvoc.api as ev
|
|
974
|
+
except ImportError as e:
|
|
975
|
+
console.print(f"[red]❌ Cannot import esgvoc.api: {e}[/red]")
|
|
976
|
+
return False
|
|
977
|
+
|
|
978
|
+
repo_dir = Path(repo_path)
|
|
979
|
+
errors = []
|
|
980
|
+
|
|
981
|
+
# Test 1: Verify project exists in esgvoc
|
|
982
|
+
try:
|
|
983
|
+
projects = ev.get_all_projects()
|
|
984
|
+
if project_name not in projects:
|
|
985
|
+
errors.append(f"❌ Project '{project_name}' not found in esgvoc. Available: {projects}")
|
|
986
|
+
return False
|
|
987
|
+
console.print(f"[green]✅ Project '{project_name}' found in esgvoc[/green]")
|
|
988
|
+
except Exception as e:
|
|
989
|
+
errors.append(f"❌ Failed to get projects from esgvoc: {e}")
|
|
990
|
+
return False
|
|
991
|
+
|
|
992
|
+
# Get repository collections
|
|
993
|
+
repo_collections = []
|
|
994
|
+
all_directories = [p for p in repo_dir.iterdir() if p.is_dir()]
|
|
995
|
+
for directory in all_directories:
|
|
996
|
+
files_in_dir = list(directory.iterdir())
|
|
997
|
+
jsonld_files = [f for f in files_in_dir if f.name.endswith(".jsonld")]
|
|
998
|
+
if len(jsonld_files) > 0:
|
|
999
|
+
repo_collections.append(directory.name)
|
|
1000
|
+
|
|
1001
|
+
# Test 2: Get collections from esgvoc
|
|
1002
|
+
try:
|
|
1003
|
+
# Debug: Check active configuration during API test
|
|
1004
|
+
current_active = service.get_config_manager().get_active_config_name()
|
|
1005
|
+
console.print(f"[dim]Debug: Active config during API test: {current_active}[/dim]")
|
|
1006
|
+
|
|
1007
|
+
esgvoc_collections = ev.get_all_collections_in_project(project_name)
|
|
1008
|
+
console.print(
|
|
1009
|
+
f"Found {len(esgvoc_collections)} collections in esgvoc, {len(repo_collections)} in repository"
|
|
1010
|
+
)
|
|
1011
|
+
except ValidationError as e:
|
|
1012
|
+
# Enhanced error reporting for Pydantic validation errors
|
|
1013
|
+
error_msg = f"❌ Validation error while processing collections for project '{project_name}'"
|
|
1014
|
+
|
|
1015
|
+
# Try to extract more context from the error
|
|
1016
|
+
if hasattr(e, "errors") and e.errors():
|
|
1017
|
+
for error in e.errors():
|
|
1018
|
+
if "input" in error and "ctx" in error:
|
|
1019
|
+
error_msg += f"\n • Invalid value: '{error['input']}'"
|
|
1020
|
+
if "enum_values" in error["ctx"]:
|
|
1021
|
+
error_msg += f"\n • Expected one of: {error['ctx']['enum_values']}"
|
|
1022
|
+
if error.get("type") == "enum":
|
|
1023
|
+
error_msg += f"\n • Field: {error.get('loc', 'unknown')}"
|
|
1024
|
+
|
|
1025
|
+
errors.append(error_msg)
|
|
1026
|
+
console.print(f"[red]{error_msg}[/red]")
|
|
1027
|
+
console.print(f"[dim]Full error details: {str(e)}[/dim]")
|
|
1028
|
+
return False
|
|
1029
|
+
except ValueError as e:
|
|
1030
|
+
# Enhanced error reporting for database validation issues
|
|
1031
|
+
error_str = str(e)
|
|
1032
|
+
if "collections with empty term_kind" in error_str:
|
|
1033
|
+
console.print(f"[red]❌ Database validation error for project '{project_name}':[/red]")
|
|
1034
|
+
console.print(f"[red]{error_str}[/red]")
|
|
1035
|
+
errors.append(f"❌ Invalid termkind values in database for project '{project_name}'")
|
|
1036
|
+
else:
|
|
1037
|
+
errors.append(f"❌ Failed to get collections from esgvoc: {e}")
|
|
1038
|
+
console.print(f"[red]API Error Details: {e}[/red]")
|
|
1039
|
+
return False
|
|
1040
|
+
except Exception as e:
|
|
1041
|
+
errors.append(f"❌ Failed to get collections from esgvoc: {e}")
|
|
1042
|
+
console.print(f"[red]API Error Details: {e}[/red]")
|
|
1043
|
+
return False
|
|
1044
|
+
|
|
1045
|
+
# Test 3: Verify each repository collection is queryable
|
|
1046
|
+
missing_in_esgvoc = []
|
|
1047
|
+
for collection_name in repo_collections:
|
|
1048
|
+
if collection_name not in esgvoc_collections:
|
|
1049
|
+
missing_in_esgvoc.append(collection_name)
|
|
1050
|
+
else:
|
|
1051
|
+
console.print(f" [green]✅ Collection '{collection_name}' found in esgvoc[/green]")
|
|
1052
|
+
|
|
1053
|
+
if missing_in_esgvoc:
|
|
1054
|
+
errors.append(f"❌ Collections in repository but not in esgvoc: {missing_in_esgvoc}")
|
|
1055
|
+
|
|
1056
|
+
# Test 4: Test elements in each collection
|
|
1057
|
+
for collection_name in repo_collections:
|
|
1058
|
+
if collection_name in esgvoc_collections:
|
|
1059
|
+
console.print(f"📂 Testing elements in collection: {collection_name}")
|
|
1060
|
+
|
|
1061
|
+
# Get repository elements
|
|
1062
|
+
collection_dir = repo_dir / collection_name
|
|
1063
|
+
json_files = [
|
|
1064
|
+
f for f in collection_dir.iterdir() if f.name.endswith(".json") and not f.name.endswith(".jsonld")
|
|
1065
|
+
]
|
|
1066
|
+
|
|
1067
|
+
repo_elements = []
|
|
1068
|
+
repo_element_sources = {} # Track where each ID comes from
|
|
1069
|
+
for json_file in json_files:
|
|
1070
|
+
try:
|
|
1071
|
+
with open(json_file, "r", encoding="utf-8") as f:
|
|
1072
|
+
content = json.load(f)
|
|
1073
|
+
element_id = content.get("id", json_file.stem)
|
|
1074
|
+
repo_elements.append(element_id)
|
|
1075
|
+
repo_element_sources[element_id] = {"file": json_file.name, "from_id_field": "id" in content}
|
|
1076
|
+
except:
|
|
1077
|
+
element_id = json_file.stem
|
|
1078
|
+
repo_elements.append(element_id)
|
|
1079
|
+
repo_element_sources[element_id] = {"file": json_file.name, "from_id_field": False}
|
|
1080
|
+
|
|
1081
|
+
# Get esgvoc elements
|
|
1082
|
+
try:
|
|
1083
|
+
esgvoc_terms = ev.get_all_terms_in_collection(project_name, collection_name)
|
|
1084
|
+
esgvoc_element_ids = [term.id for term in esgvoc_terms]
|
|
1085
|
+
|
|
1086
|
+
console.print(f" Repository: {len(repo_elements)}, ESGVoc: {len(esgvoc_element_ids)} elements")
|
|
1087
|
+
|
|
1088
|
+
missing_elements = [elem for elem in repo_elements if elem not in esgvoc_element_ids]
|
|
1089
|
+
if missing_elements:
|
|
1090
|
+
errors.append(
|
|
1091
|
+
f"❌ Collection '{collection_name}': Elements missing from esgvoc: {missing_elements}"
|
|
1092
|
+
)
|
|
1093
|
+
|
|
1094
|
+
# Debug missing elements source tracking
|
|
1095
|
+
if self.debug_missing_terms:
|
|
1096
|
+
console.print(f" [dim]Missing elements and their sources:[/dim]")
|
|
1097
|
+
for elem in missing_elements:
|
|
1098
|
+
source_info = repo_element_sources.get(
|
|
1099
|
+
elem, {"file": "unknown", "from_id_field": False}
|
|
1100
|
+
)
|
|
1101
|
+
id_source = "id field" if source_info["from_id_field"] else "filename"
|
|
1102
|
+
console.print(f" [dim] • {elem} (from {source_info['file']} {id_source})[/dim]")
|
|
1103
|
+
|
|
1104
|
+
# Detailed debugging for each missing element (if enabled)
|
|
1105
|
+
if self.debug_missing_terms:
|
|
1106
|
+
console.print(
|
|
1107
|
+
f"\n[bold red]📋 Detailed analysis of missing elements in '{collection_name}':[/bold red]"
|
|
1108
|
+
)
|
|
1109
|
+
for missing_element in missing_elements:
|
|
1110
|
+
self._debug_missing_term(project_name, collection_name, missing_element, repo_path)
|
|
1111
|
+
else:
|
|
1112
|
+
console.print(f"[dim]💡 Use --debug-terms for detailed analysis of missing elements[/dim]")
|
|
1113
|
+
else:
|
|
1114
|
+
console.print(f" [green]✅ All elements in '{collection_name}' are queryable[/green]")
|
|
1115
|
+
|
|
1116
|
+
except Exception as e:
|
|
1117
|
+
errors.append(f"❌ Failed to get terms from collection '{collection_name}': {e}")
|
|
1118
|
+
|
|
1119
|
+
# Test 5: General API functions
|
|
1120
|
+
try:
|
|
1121
|
+
all_terms = ev.get_all_terms_in_all_projects()
|
|
1122
|
+
console.print(f"[blue]📊 ESGVoc API returned {len(all_terms)} total terms across all projects[/blue]")
|
|
1123
|
+
except Exception as e:
|
|
1124
|
+
errors.append(f"❌ Failed to get all terms from esgvoc: {e}")
|
|
1125
|
+
|
|
1126
|
+
# Summary
|
|
1127
|
+
if errors:
|
|
1128
|
+
console.print(f"\n[red]❌ ESGVoc API validation failed with {len(errors)} errors:[/red]")
|
|
1129
|
+
for error in errors:
|
|
1130
|
+
console.print(f" {error}")
|
|
1131
|
+
return False
|
|
1132
|
+
else:
|
|
1133
|
+
console.print("\n[green]✅ ESGVoc API validation passed![/green]")
|
|
1134
|
+
console.print(f"✅ Validated {len(repo_collections)} collections")
|
|
1135
|
+
console.print("✅ All repository elements accessible through esgvoc API")
|
|
1136
|
+
return True
|
|
1137
|
+
|
|
1138
|
+
def run_complete_test(
|
|
1139
|
+
self,
|
|
1140
|
+
project_name: str,
|
|
1141
|
+
repo_url: str = None,
|
|
1142
|
+
branch: str = None,
|
|
1143
|
+
repo_path: str = None,
|
|
1144
|
+
esgvoc_branch: str = None,
|
|
1145
|
+
universe_branch: str = None,
|
|
1146
|
+
) -> bool:
|
|
1147
|
+
"""
|
|
1148
|
+
Run complete CV testing pipeline
|
|
1149
|
+
|
|
1150
|
+
Args:
|
|
1151
|
+
project_name: Name of the project to test
|
|
1152
|
+
repo_url: Custom repository URL (optional)
|
|
1153
|
+
branch: Custom branch (optional)
|
|
1154
|
+
repo_path: Path to repository for structure testing (optional - auto-detected if not provided)
|
|
1155
|
+
esgvoc_branch: ESGVoc library branch (for info only)
|
|
1156
|
+
universe_branch: Custom universe branch (optional)
|
|
1157
|
+
|
|
1158
|
+
Returns:
|
|
1159
|
+
bool: True if all tests pass
|
|
1160
|
+
"""
|
|
1161
|
+
console.print(f"[bold blue]🚀 Starting complete CV test for project: {project_name}[/bold blue]")
|
|
1162
|
+
|
|
1163
|
+
success = True
|
|
1164
|
+
|
|
1165
|
+
# Step 1: Configure esgvoc
|
|
1166
|
+
if not self.configure_for_testing(project_name, repo_url, branch, esgvoc_branch, universe_branch):
|
|
1167
|
+
return False
|
|
1168
|
+
|
|
1169
|
+
# Step 2: Synchronize CVs
|
|
1170
|
+
if not self.synchronize_cvs():
|
|
1171
|
+
success = False
|
|
1172
|
+
|
|
1173
|
+
# Step 2.5: Validate universe for warnings
|
|
1174
|
+
self._validate_universe_warnings()
|
|
1175
|
+
|
|
1176
|
+
# Step 3: Determine repository path AFTER synchronization - use downloaded CV repository if not specified
|
|
1177
|
+
if repo_path is None:
|
|
1178
|
+
# Use the state service to get the actual project path directly
|
|
1179
|
+
try:
|
|
1180
|
+
current_state = service.get_state()
|
|
1181
|
+
if hasattr(current_state, "projects") and project_name in current_state.projects:
|
|
1182
|
+
project_state = current_state.projects[project_name]
|
|
1183
|
+
if hasattr(project_state, "local_path") and project_state.local_path:
|
|
1184
|
+
repo_path = str(project_state.local_path)
|
|
1185
|
+
console.print(f"[blue]Using CV repository from state service: {repo_path}[/blue]")
|
|
1186
|
+
else:
|
|
1187
|
+
console.print("[dim]Debug: Project state has no local_path[/dim]")
|
|
1188
|
+
else:
|
|
1189
|
+
console.print(f"[dim]Debug: Project {project_name} not found in state service projects[/dim]")
|
|
1190
|
+
console.print(
|
|
1191
|
+
f"[dim]Debug: Available projects in state: {list(current_state.projects.keys()) if hasattr(current_state, 'projects') else 'No projects'}[/dim]"
|
|
1192
|
+
)
|
|
1193
|
+
except Exception as e:
|
|
1194
|
+
console.print(f"[dim]Debug: Error accessing state service: {e}[/dim]")
|
|
1195
|
+
|
|
1196
|
+
# Fallback: try to find the repository using the known default local path
|
|
1197
|
+
if repo_path is None:
|
|
1198
|
+
try:
|
|
1199
|
+
from esgvoc.core.service.configuration.setting import ServiceSettings
|
|
1200
|
+
|
|
1201
|
+
if project_name in ServiceSettings.DEFAULT_PROJECT_CONFIGS:
|
|
1202
|
+
default_local_path = ServiceSettings.DEFAULT_PROJECT_CONFIGS[project_name]["local_path"]
|
|
1203
|
+
config_manager = service.get_config_manager()
|
|
1204
|
+
|
|
1205
|
+
# Try different path constructions to find where the repository actually is
|
|
1206
|
+
possible_paths = [
|
|
1207
|
+
config_manager.data_config_dir / default_local_path,
|
|
1208
|
+
config_manager.data_dir / self.test_config_name / default_local_path,
|
|
1209
|
+
config_manager.data_dir / default_local_path,
|
|
1210
|
+
]
|
|
1211
|
+
|
|
1212
|
+
# Also check in other configuration directories
|
|
1213
|
+
if config_manager.data_dir.exists():
|
|
1214
|
+
for config_dir in config_manager.data_dir.iterdir():
|
|
1215
|
+
if config_dir.is_dir():
|
|
1216
|
+
possible_repo_path = config_dir / default_local_path
|
|
1217
|
+
if possible_repo_path.exists():
|
|
1218
|
+
possible_paths.append(possible_repo_path)
|
|
1219
|
+
|
|
1220
|
+
for path in possible_paths:
|
|
1221
|
+
if path and path.exists():
|
|
1222
|
+
repo_path = str(path)
|
|
1223
|
+
console.print(f"[blue]Found CV repository at: {repo_path}[/blue]")
|
|
1224
|
+
break
|
|
1225
|
+
except Exception as e:
|
|
1226
|
+
console.print(f"[dim]Debug: Error in fallback path detection: {e}[/dim]")
|
|
1227
|
+
|
|
1228
|
+
# Final fallback
|
|
1229
|
+
if repo_path is None:
|
|
1230
|
+
repo_path = "."
|
|
1231
|
+
console.print("[yellow]⚠️ Could not determine CV repository path, using current directory[/yellow]")
|
|
1232
|
+
|
|
1233
|
+
# Step 3: Test repository structure
|
|
1234
|
+
if not self.test_repository_structure(repo_path):
|
|
1235
|
+
success = False
|
|
1236
|
+
|
|
1237
|
+
# Debug: Check what configuration is active before API test
|
|
1238
|
+
current_active = service.get_config_manager().get_active_config_name()
|
|
1239
|
+
console.print(f"[dim]Debug: Active config before API test: {current_active}[/dim]")
|
|
1240
|
+
|
|
1241
|
+
# Step 4: Test esgvoc API access
|
|
1242
|
+
if not self.test_esgvoc_api_access(project_name, repo_path):
|
|
1243
|
+
success = False
|
|
1244
|
+
|
|
1245
|
+
# Summary
|
|
1246
|
+
if success:
|
|
1247
|
+
console.print(f"\n[bold green]🎉 All tests passed for project '{project_name}'![/bold green]")
|
|
1248
|
+
else:
|
|
1249
|
+
console.print(f"\n[bold red]❌ Some tests failed for project '{project_name}'[/bold red]")
|
|
1250
|
+
|
|
1251
|
+
return success
|
|
1252
|
+
|
|
1253
|
+
def restore_original_configuration(self):
|
|
1254
|
+
"""Restore the original esgvoc configuration"""
|
|
1255
|
+
try:
|
|
1256
|
+
if self.config_manager and self.original_config_name:
|
|
1257
|
+
# Switch back to original configuration
|
|
1258
|
+
console.print(f"[blue]Restoring original configuration: {self.original_config_name}[/blue]")
|
|
1259
|
+
self.config_manager.switch_config(self.original_config_name)
|
|
1260
|
+
|
|
1261
|
+
# CRITICAL: Restore the original data_config_dir
|
|
1262
|
+
self.config_manager.data_config_dir = self.config_manager.data_dir / self.original_config_name
|
|
1263
|
+
self.config_manager.data_config_dir.mkdir(parents=True, exist_ok=True)
|
|
1264
|
+
console.print(f"[dim]Debug: Restored data_config_dir to: {self.config_manager.data_config_dir}[/dim]")
|
|
1265
|
+
|
|
1266
|
+
# Reset service state
|
|
1267
|
+
service.current_state = service.get_state()
|
|
1268
|
+
|
|
1269
|
+
# Remove temporary test configuration
|
|
1270
|
+
configs = self.config_manager.list_configs()
|
|
1271
|
+
if self.test_config_name in configs:
|
|
1272
|
+
console.print(f"[blue]Removing temporary test configuration: {self.test_config_name}[/blue]")
|
|
1273
|
+
self.config_manager.remove_config(self.test_config_name)
|
|
1274
|
+
|
|
1275
|
+
console.print(f"[green]✅ Restored original configuration: {self.original_config_name}[/green]")
|
|
1276
|
+
except Exception as e:
|
|
1277
|
+
console.print(f"[yellow]⚠️ Error restoring original configuration: {e}[/yellow]")
|
|
1278
|
+
|
|
1279
|
+
def cleanup(self):
|
|
1280
|
+
"""Cleanup resources and restore original configuration"""
|
|
1281
|
+
self.restore_original_configuration()
|
|
1282
|
+
|
|
1283
|
+
|
|
1284
|
+
def main():
|
|
1285
|
+
"""Main CLI interface"""
|
|
1286
|
+
if len(sys.argv) < 2:
|
|
1287
|
+
print("Usage: cv_tester.py <command> [options]")
|
|
1288
|
+
print("\nCommands:")
|
|
1289
|
+
print(" list - List available projects")
|
|
1290
|
+
print(" configure <project> - Configure esgvoc for testing")
|
|
1291
|
+
print(" test <project> - Run complete test suite")
|
|
1292
|
+
print(" structure <path> - Test repository structure only")
|
|
1293
|
+
print(" api <project> <path> - Test esgvoc API access only")
|
|
1294
|
+
print("\nEnvironment variables:")
|
|
1295
|
+
print(" TEST_BRANCH - Custom project branch to test")
|
|
1296
|
+
print(" REPO_URL - Custom repository URL")
|
|
1297
|
+
print(" UNIVERSE_BRANCH - Custom universe branch to test")
|
|
1298
|
+
print(" ESGVOC_LIBRARY_BRANCH - ESGVoc library branch (for info)")
|
|
1299
|
+
sys.exit(1)
|
|
1300
|
+
|
|
1301
|
+
command = sys.argv[1]
|
|
1302
|
+
tester = CVTester()
|
|
1303
|
+
|
|
1304
|
+
try:
|
|
1305
|
+
if command == "list":
|
|
1306
|
+
projects = tester.get_available_projects()
|
|
1307
|
+
console.print(f"[blue]Available projects ({len(projects)}):[/blue]")
|
|
1308
|
+
for project in projects:
|
|
1309
|
+
config = ServiceSettings.DEFAULT_PROJECT_CONFIGS[project]
|
|
1310
|
+
console.print(f" [cyan]{project}[/cyan] - {config['github_repo']} (branch: {config['branch']})")
|
|
1311
|
+
|
|
1312
|
+
elif command == "configure":
|
|
1313
|
+
if len(sys.argv) < 3:
|
|
1314
|
+
console.print("[red]Error: Project name required[/red]")
|
|
1315
|
+
sys.exit(1)
|
|
1316
|
+
|
|
1317
|
+
project_name = sys.argv[2]
|
|
1318
|
+
repo_url = os.environ.get("REPO_URL")
|
|
1319
|
+
branch = os.environ.get("TEST_BRANCH")
|
|
1320
|
+
esgvoc_branch = os.environ.get("ESGVOC_LIBRARY_BRANCH")
|
|
1321
|
+
|
|
1322
|
+
if tester.configure_for_testing(project_name, repo_url, branch, esgvoc_branch):
|
|
1323
|
+
if tester.synchronize_cvs():
|
|
1324
|
+
console.print("[green]✅ Configuration complete[/green]")
|
|
1325
|
+
else:
|
|
1326
|
+
sys.exit(1)
|
|
1327
|
+
else:
|
|
1328
|
+
sys.exit(1)
|
|
1329
|
+
|
|
1330
|
+
elif command == "test":
|
|
1331
|
+
if len(sys.argv) < 3:
|
|
1332
|
+
console.print("[red]Error: Project name required[/red]")
|
|
1333
|
+
sys.exit(1)
|
|
1334
|
+
|
|
1335
|
+
project_name = sys.argv[2]
|
|
1336
|
+
repo_url = os.environ.get("REPO_URL")
|
|
1337
|
+
branch = os.environ.get("TEST_BRANCH")
|
|
1338
|
+
repo_path = sys.argv[3] if len(sys.argv) > 3 else "."
|
|
1339
|
+
esgvoc_branch = os.environ.get("ESGVOC_LIBRARY_BRANCH")
|
|
1340
|
+
|
|
1341
|
+
success = tester.run_complete_test(project_name, repo_url, branch, repo_path, esgvoc_branch)
|
|
1342
|
+
sys.exit(0 if success else 1)
|
|
1343
|
+
|
|
1344
|
+
elif command == "structure":
|
|
1345
|
+
repo_path = sys.argv[2] if len(sys.argv) > 2 else "."
|
|
1346
|
+
success = tester.test_repository_structure(repo_path)
|
|
1347
|
+
sys.exit(0 if success else 1)
|
|
1348
|
+
|
|
1349
|
+
elif command == "api":
|
|
1350
|
+
if len(sys.argv) < 3:
|
|
1351
|
+
console.print("[red]Error: Project name required[/red]")
|
|
1352
|
+
sys.exit(1)
|
|
1353
|
+
|
|
1354
|
+
project_name = sys.argv[2]
|
|
1355
|
+
repo_path = sys.argv[3] if len(sys.argv) > 3 else "."
|
|
1356
|
+
success = tester.test_esgvoc_api_access(project_name, repo_path)
|
|
1357
|
+
sys.exit(0 if success else 1)
|
|
1358
|
+
|
|
1359
|
+
else:
|
|
1360
|
+
console.print(f"[red]Error: Unknown command '{command}'[/red]")
|
|
1361
|
+
sys.exit(1)
|
|
1362
|
+
|
|
1363
|
+
finally:
|
|
1364
|
+
tester.cleanup()
|
|
1365
|
+
|
|
1366
|
+
|
|
1367
|
+
if __name__ == "__main__":
|
|
1368
|
+
main()
|