esgvoc 1.0.1__py3-none-any.whl → 1.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of esgvoc might be problematic. Click here for more details.
- esgvoc/__init__.py +1 -1
- esgvoc/api/__init__.py +0 -6
- esgvoc/api/data_descriptors/__init__.py +6 -0
- esgvoc/api/data_descriptors/archive.py +5 -0
- esgvoc/api/data_descriptors/citation_url.py +5 -0
- esgvoc/api/data_descriptors/experiment.py +2 -2
- esgvoc/api/data_descriptors/known_branded_variable.py +58 -5
- esgvoc/api/data_descriptors/regex.py +5 -0
- esgvoc/api/data_descriptors/vertical_label.py +2 -2
- esgvoc/api/project_specs.py +48 -130
- esgvoc/api/projects.py +104 -63
- esgvoc/apps/drs/generator.py +47 -42
- esgvoc/apps/drs/validator.py +22 -38
- esgvoc/apps/jsg/json_schema_generator.py +252 -136
- esgvoc/apps/jsg/templates/template.jinja +249 -0
- esgvoc/apps/test_cv/README.md +214 -0
- esgvoc/apps/test_cv/cv_tester.py +1368 -0
- esgvoc/apps/test_cv/example_usage.py +216 -0
- esgvoc/apps/vr/__init__.py +12 -0
- esgvoc/apps/vr/build_variable_registry.py +71 -0
- esgvoc/apps/vr/example_usage.py +60 -0
- esgvoc/apps/vr/vr_app.py +333 -0
- esgvoc/cli/config.py +671 -86
- esgvoc/cli/drs.py +39 -21
- esgvoc/cli/main.py +2 -0
- esgvoc/cli/test_cv.py +257 -0
- esgvoc/core/constants.py +10 -7
- esgvoc/core/data_handler.py +24 -22
- esgvoc/core/db/connection.py +7 -0
- esgvoc/core/db/project_ingestion.py +34 -9
- esgvoc/core/db/universe_ingestion.py +1 -2
- esgvoc/core/service/configuration/setting.py +192 -21
- esgvoc/core/service/data_merger.py +1 -1
- esgvoc/core/service/state.py +18 -2
- {esgvoc-1.0.1.dist-info → esgvoc-1.1.1.dist-info}/METADATA +2 -1
- {esgvoc-1.0.1.dist-info → esgvoc-1.1.1.dist-info}/RECORD +40 -29
- esgvoc/apps/jsg/cmip6_template.json +0 -74
- /esgvoc/apps/{py.typed → test_cv/__init__.py} +0 -0
- {esgvoc-1.0.1.dist-info → esgvoc-1.1.1.dist-info}/WHEEL +0 -0
- {esgvoc-1.0.1.dist-info → esgvoc-1.1.1.dist-info}/entry_points.txt +0 -0
- {esgvoc-1.0.1.dist-info → esgvoc-1.1.1.dist-info}/licenses/LICENSE.txt +0 -0
esgvoc/cli/drs.py
CHANGED
|
@@ -16,12 +16,12 @@ app = typer.Typer()
|
|
|
16
16
|
console = Console()
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
|
|
20
19
|
# Predefined list of projects and DRS types
|
|
21
20
|
# projects = ["cmip5", "cmip6","cmip6plus", "cmip7"]
|
|
22
21
|
projects = ev.get_all_projects()
|
|
23
22
|
drs_types = ["filename", "directory", "dataset"]
|
|
24
23
|
|
|
24
|
+
|
|
25
25
|
def display(table):
|
|
26
26
|
"""
|
|
27
27
|
Function to display a rich table in the console.
|
|
@@ -34,15 +34,26 @@ def display(table):
|
|
|
34
34
|
|
|
35
35
|
@app.command()
|
|
36
36
|
def drsvalid(
|
|
37
|
-
drs_entries: Optional[List[str]] = typer.Argument(
|
|
38
|
-
|
|
37
|
+
drs_entries: Optional[List[str]] = typer.Argument(
|
|
38
|
+
None, help="List of DRS validation inputs in the form <project> <drstype> <string>"
|
|
39
|
+
),
|
|
40
|
+
file: Optional[typer.FileText] = typer.Option(
|
|
41
|
+
None,
|
|
42
|
+
"--file",
|
|
43
|
+
"-f",
|
|
44
|
+
help="File containing DRS validation inputs, one per line in the form <project> <drstype> <string>",
|
|
45
|
+
),
|
|
39
46
|
verbose: bool = typer.Option(False, "-v", "--verbose", help="Provide detailed validation results"),
|
|
40
47
|
output: Optional[str] = typer.Option(None, "-o", "--output", help="File to save the DRS entries validation"),
|
|
41
|
-
rm_prefix: Optional[str] = typer.Option(
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
48
|
+
rm_prefix: Optional[str] = typer.Option(
|
|
49
|
+
None, "-p", "--prefix", help="Remove given prefix from all checked directory"
|
|
50
|
+
),
|
|
51
|
+
pedantic: Optional[bool] = typer.Option(
|
|
52
|
+
False,
|
|
53
|
+
"-e",
|
|
54
|
+
"--enforce",
|
|
55
|
+
help="Enable pedantic mode, enforcing strict compliance, mean that warnings are now errors.",
|
|
56
|
+
),
|
|
46
57
|
) -> List[DrsValidationReport]:
|
|
47
58
|
"""
|
|
48
59
|
Validates DRS strings for a specific project and type.
|
|
@@ -68,14 +79,13 @@ def drsvalid(
|
|
|
68
79
|
if not sys.stdin.isatty(): # Check if input is being piped via stdin
|
|
69
80
|
entries.extend(el for line in sys.stdin for el in shlex.split(line))
|
|
70
81
|
|
|
71
|
-
|
|
72
82
|
if file:
|
|
73
83
|
entries.extend(el for line in file for el in line.strip().split(" "))
|
|
74
84
|
|
|
75
85
|
i = 0
|
|
76
86
|
while i < len(entries):
|
|
77
|
-
if entries[i] in [""," "]:
|
|
78
|
-
i+=1
|
|
87
|
+
if entries[i] in ["", " "]:
|
|
88
|
+
i += 1
|
|
79
89
|
continue
|
|
80
90
|
|
|
81
91
|
if entries[i] in projects:
|
|
@@ -102,9 +112,9 @@ def drsvalid(
|
|
|
102
112
|
report = validator.validate_file_name(string)
|
|
103
113
|
case "directory":
|
|
104
114
|
if rm_prefix:
|
|
105
|
-
prefix = rm_prefix+"/" if rm_prefix[-1]!="/" else ""
|
|
115
|
+
prefix = rm_prefix + "/" if rm_prefix[-1] != "/" else ""
|
|
106
116
|
else:
|
|
107
|
-
prefix=None
|
|
117
|
+
prefix = None
|
|
108
118
|
report = validator.validate_directory(string, prefix)
|
|
109
119
|
case "dataset":
|
|
110
120
|
report = validator.validate_dataset_id(string)
|
|
@@ -127,8 +137,8 @@ def drsvalid(
|
|
|
127
137
|
errors = "\n".join(["⚠️ " + str(error) for error in report.errors])
|
|
128
138
|
valid = "✅ Valid" if report else "❌ Invalid"
|
|
129
139
|
|
|
130
|
-
table.add_row("-"*4,"-"*4,"-"*4,"-"*4,"-"*4)
|
|
131
|
-
table.add_row(entry,proj_and_type, warnings, errors, valid)
|
|
140
|
+
table.add_row("-" * 4, "-" * 4, "-" * 4, "-" * 4, "-" * 4)
|
|
141
|
+
table.add_row(entry, proj_and_type, warnings, errors, valid)
|
|
132
142
|
|
|
133
143
|
console.print(table)
|
|
134
144
|
elif output:
|
|
@@ -137,7 +147,6 @@ def drsvalid(
|
|
|
137
147
|
f.write(str(report) + "\n")
|
|
138
148
|
console.print(f"DRS validation entries saved to [green]{output}[/green]")
|
|
139
149
|
|
|
140
|
-
|
|
141
150
|
else:
|
|
142
151
|
for report in reports:
|
|
143
152
|
console.print(str(report))
|
|
@@ -147,8 +156,15 @@ def drsvalid(
|
|
|
147
156
|
|
|
148
157
|
@app.command()
|
|
149
158
|
def drsgen(
|
|
150
|
-
drs_entries: Optional[List[str]] = typer.Argument(
|
|
151
|
-
|
|
159
|
+
drs_entries: Optional[List[str]] = typer.Argument(
|
|
160
|
+
None, help="List of inputs to generate DRS in the form <project> <drstype> <bag_of_terms>"
|
|
161
|
+
),
|
|
162
|
+
file: Optional[typer.FileText] = typer.Option(
|
|
163
|
+
None,
|
|
164
|
+
"--file",
|
|
165
|
+
"-f",
|
|
166
|
+
help="File containing DRS generation inputs, one per line in the form <project> <drstype> <bag_of_terms>",
|
|
167
|
+
),
|
|
152
168
|
verbose: bool = typer.Option(False, "-v", "--verbose", help="Provide detailed generation results"),
|
|
153
169
|
output: Optional[str] = typer.Option(None, "-o", "--output", help="File to save the generated DRS entries"),
|
|
154
170
|
) -> List[DrsGenerationReport]:
|
|
@@ -182,8 +198,8 @@ def drsgen(
|
|
|
182
198
|
|
|
183
199
|
i = 0
|
|
184
200
|
while i < len(entries):
|
|
185
|
-
if entries[i] in [""," "]:
|
|
186
|
-
i+=1
|
|
201
|
+
if entries[i] in ["", " "]:
|
|
202
|
+
i += 1
|
|
187
203
|
continue
|
|
188
204
|
if entries[i] in projects:
|
|
189
205
|
current_project = entries[i]
|
|
@@ -231,7 +247,7 @@ def drsgen(
|
|
|
231
247
|
table.add_row(entry, warnings, errors, result)
|
|
232
248
|
table.add_row("----", "----", "----", "----")
|
|
233
249
|
if table.columns[3].width is not None and len(result) > table.columns[3].width:
|
|
234
|
-
table.columns[3].width = len(result)+1
|
|
250
|
+
table.columns[3].width = len(result) + 1
|
|
235
251
|
console.print(table)
|
|
236
252
|
|
|
237
253
|
elif output:
|
|
@@ -245,5 +261,7 @@ def drsgen(
|
|
|
245
261
|
console.print(str(report))
|
|
246
262
|
|
|
247
263
|
return generated_reports
|
|
264
|
+
|
|
265
|
+
|
|
248
266
|
if __name__ == "__main__":
|
|
249
267
|
app()
|
esgvoc/cli/main.py
CHANGED
|
@@ -6,6 +6,7 @@ from esgvoc.cli.find import app as find_app
|
|
|
6
6
|
from esgvoc.cli.get import app as get_app
|
|
7
7
|
from esgvoc.cli.install import app as install_app
|
|
8
8
|
from esgvoc.cli.status import app as status_app
|
|
9
|
+
from esgvoc.cli.test_cv import app as test_cv_app
|
|
9
10
|
from esgvoc.cli.valid import app as valid_app
|
|
10
11
|
|
|
11
12
|
app = typer.Typer()
|
|
@@ -17,6 +18,7 @@ app.add_typer(valid_app)
|
|
|
17
18
|
app.add_typer(install_app)
|
|
18
19
|
app.add_typer(drs_app)
|
|
19
20
|
app.add_typer(config_app, name="config")
|
|
21
|
+
app.add_typer(test_cv_app, name="test")
|
|
20
22
|
app.add_typer(find_app)
|
|
21
23
|
|
|
22
24
|
|
esgvoc/cli/test_cv.py
ADDED
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Test CV CLI commands
|
|
3
|
+
|
|
4
|
+
Provides commands for testing project CVs and Universe CVs integrated with esgvoc CLI.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
import typer
|
|
10
|
+
from rich.console import Console
|
|
11
|
+
from rich.table import Table
|
|
12
|
+
|
|
13
|
+
from esgvoc.apps.test_cv.cv_tester import CVTester
|
|
14
|
+
from esgvoc.core.service.configuration.setting import ServiceSettings
|
|
15
|
+
|
|
16
|
+
app = typer.Typer()
|
|
17
|
+
console = Console()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@app.command()
|
|
21
|
+
def list_projects():
|
|
22
|
+
"""List all available CV projects that can be tested."""
|
|
23
|
+
tester = CVTester()
|
|
24
|
+
projects = tester.get_available_projects()
|
|
25
|
+
|
|
26
|
+
table = Table(title="Available CV Projects for Testing")
|
|
27
|
+
table.add_column("Project Name", style="cyan")
|
|
28
|
+
table.add_column("Repository", style="green")
|
|
29
|
+
table.add_column("Default Branch", style="yellow")
|
|
30
|
+
table.add_column("Local Path", style="blue")
|
|
31
|
+
|
|
32
|
+
for project_name in projects:
|
|
33
|
+
config = ServiceSettings.DEFAULT_PROJECT_CONFIGS[project_name]
|
|
34
|
+
table.add_row(project_name, config["github_repo"], config["branch"], config["local_path"])
|
|
35
|
+
|
|
36
|
+
console.print(table)
|
|
37
|
+
console.print(f"\n[blue]Total: {len(projects)} projects available for testing[/blue]")
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@app.command()
|
|
41
|
+
def configure(
|
|
42
|
+
project: str = typer.Argument(..., help="Project name to configure for testing"),
|
|
43
|
+
repo_url: Optional[str] = typer.Option(None, "--repo", "-r", help="Custom repository URL"),
|
|
44
|
+
branch: Optional[str] = typer.Option(None, "--branch", "-b", help="Custom branch to test"),
|
|
45
|
+
universe_branch: Optional[str] = typer.Option(None, "--universe-branch", "-u", help="Custom universe branch"),
|
|
46
|
+
sync: bool = typer.Option(True, "--sync/--no-sync", help="Synchronize CVs after configuration"),
|
|
47
|
+
):
|
|
48
|
+
"""
|
|
49
|
+
Configure esgvoc with a specific project for testing.
|
|
50
|
+
|
|
51
|
+
Examples:
|
|
52
|
+
esgvoc test configure obs4mip
|
|
53
|
+
esgvoc test configure cmip6 --branch my-test-branch
|
|
54
|
+
esgvoc test configure cmip6 --universe-branch my-universe-branch
|
|
55
|
+
esgvoc test configure custom --repo https://github.com/me/my-cvs --branch main --universe-branch dev
|
|
56
|
+
"""
|
|
57
|
+
tester = CVTester()
|
|
58
|
+
|
|
59
|
+
try:
|
|
60
|
+
# Configure
|
|
61
|
+
if not tester.configure_for_testing(project, repo_url, branch, None, universe_branch):
|
|
62
|
+
raise typer.Exit(1)
|
|
63
|
+
|
|
64
|
+
# Optionally synchronize
|
|
65
|
+
if sync:
|
|
66
|
+
if not tester.synchronize_cvs():
|
|
67
|
+
raise typer.Exit(1)
|
|
68
|
+
|
|
69
|
+
console.print(f"[green]✅ Successfully configured project '{project}' for testing[/green]")
|
|
70
|
+
if not sync:
|
|
71
|
+
console.print("[yellow]Note: CVs not synchronized. Run 'esgvoc test sync' to download.[/yellow]")
|
|
72
|
+
|
|
73
|
+
except Exception as e:
|
|
74
|
+
console.print(f"[red]❌ Configuration failed: {e}[/red]")
|
|
75
|
+
raise typer.Exit(1)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@app.command()
|
|
79
|
+
def sync():
|
|
80
|
+
"""Synchronize/download CVs for the currently configured project."""
|
|
81
|
+
tester = CVTester()
|
|
82
|
+
|
|
83
|
+
try:
|
|
84
|
+
if not tester.synchronize_cvs():
|
|
85
|
+
raise typer.Exit(1)
|
|
86
|
+
console.print("[green]✅ CVs synchronized successfully[/green]")
|
|
87
|
+
except Exception as e:
|
|
88
|
+
console.print(f"[red]❌ Synchronization failed: {e}[/red]")
|
|
89
|
+
raise typer.Exit(1)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
@app.command()
|
|
93
|
+
def structure(
|
|
94
|
+
path: str = typer.Argument(".", help="Path to CV repository to validate"),
|
|
95
|
+
):
|
|
96
|
+
"""
|
|
97
|
+
Test CV repository structure and file format compliance.
|
|
98
|
+
|
|
99
|
+
Validates:
|
|
100
|
+
- Collection directory structure
|
|
101
|
+
- JSONLD context files
|
|
102
|
+
- Element JSON files
|
|
103
|
+
- project_specs.json references
|
|
104
|
+
|
|
105
|
+
Examples:
|
|
106
|
+
esgvoc test structure .
|
|
107
|
+
esgvoc test structure /path/to/cv/repo
|
|
108
|
+
"""
|
|
109
|
+
tester = CVTester()
|
|
110
|
+
|
|
111
|
+
try:
|
|
112
|
+
if not tester.test_repository_structure(path):
|
|
113
|
+
raise typer.Exit(1)
|
|
114
|
+
console.print("[green]✅ Repository structure validation passed[/green]")
|
|
115
|
+
except Exception as e:
|
|
116
|
+
console.print(f"[red]❌ Structure validation failed: {e}[/red]")
|
|
117
|
+
raise typer.Exit(1)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
@app.command()
|
|
121
|
+
def api(
|
|
122
|
+
project: str = typer.Argument(..., help="Project name to test API access for"),
|
|
123
|
+
path: str = typer.Argument(".", help="Path to CV repository"),
|
|
124
|
+
debug_terms: bool = typer.Option(True, "--debug-terms/--no-debug-terms", help="Show detailed debugging info for missing terms"),
|
|
125
|
+
):
|
|
126
|
+
"""
|
|
127
|
+
Test esgvoc API access for all repository collections and elements.
|
|
128
|
+
|
|
129
|
+
Validates:
|
|
130
|
+
- Project is accessible via esgvoc API
|
|
131
|
+
- All repository collections are queryable
|
|
132
|
+
- All repository elements are accessible
|
|
133
|
+
- API functions work correctly
|
|
134
|
+
|
|
135
|
+
Examples:
|
|
136
|
+
esgvoc test api obs4mip .
|
|
137
|
+
esgvoc test api cmip6 /path/to/cmip6/repo
|
|
138
|
+
"""
|
|
139
|
+
tester = CVTester(debug_missing_terms=debug_terms)
|
|
140
|
+
|
|
141
|
+
try:
|
|
142
|
+
if not tester.test_esgvoc_api_access(project, path):
|
|
143
|
+
raise typer.Exit(1)
|
|
144
|
+
console.print("[green]✅ ESGVoc API access validation passed[/green]")
|
|
145
|
+
except Exception as e:
|
|
146
|
+
console.print(f"[red]❌ API validation failed: {e}[/red]")
|
|
147
|
+
raise typer.Exit(1)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
@app.command()
|
|
151
|
+
def run(
|
|
152
|
+
project: str = typer.Argument(..., help="Project name to test"),
|
|
153
|
+
path: Optional[str] = typer.Argument(None, help="Path to CV repository (auto-detected if not provided)"),
|
|
154
|
+
repo_url: Optional[str] = typer.Option(None, "--repo", "-r", help="Custom repository URL"),
|
|
155
|
+
branch: Optional[str] = typer.Option(None, "--branch", "-b", help="Custom branch to test"),
|
|
156
|
+
universe_branch: Optional[str] = typer.Option(None, "--universe-branch", "-u", help="Custom universe branch"),
|
|
157
|
+
debug_terms: bool = typer.Option(True, "--debug-terms/--no-debug-terms", help="Show detailed debugging info for missing terms"),
|
|
158
|
+
):
|
|
159
|
+
"""
|
|
160
|
+
Run complete CV test suite: configure, sync, structure, and API tests.
|
|
161
|
+
|
|
162
|
+
This is the comprehensive test that runs all validation steps:
|
|
163
|
+
1. Configure esgvoc with the specified project
|
|
164
|
+
2. Synchronize/download CVs
|
|
165
|
+
3. Validate repository structure
|
|
166
|
+
4. Test esgvoc API access
|
|
167
|
+
|
|
168
|
+
Examples:
|
|
169
|
+
esgvoc test run obs4mip
|
|
170
|
+
esgvoc test run cmip6 --branch my-test-branch
|
|
171
|
+
esgvoc test run cmip6 --universe-branch my-universe-branch
|
|
172
|
+
esgvoc test run cmip6 /path/to/custom/repo --branch my-test-branch --universe-branch dev
|
|
173
|
+
esgvoc test run custom --repo https://github.com/me/cvs --branch main --universe-branch main
|
|
174
|
+
"""
|
|
175
|
+
tester = CVTester(debug_missing_terms=debug_terms)
|
|
176
|
+
|
|
177
|
+
try:
|
|
178
|
+
success = tester.run_complete_test(project, repo_url, branch, path, None, universe_branch)
|
|
179
|
+
if success:
|
|
180
|
+
console.print(f"[bold green]🎉 All tests passed for project '{project}'![/bold green]")
|
|
181
|
+
else:
|
|
182
|
+
# The detailed failure information is already printed by cv_tester
|
|
183
|
+
raise typer.Exit(1)
|
|
184
|
+
except Exception as e:
|
|
185
|
+
console.print(f"[red]❌ Test suite failed: {e}[/red]")
|
|
186
|
+
raise typer.Exit(1)
|
|
187
|
+
finally:
|
|
188
|
+
tester.cleanup()
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
@app.command()
|
|
192
|
+
def env(
|
|
193
|
+
command: str = typer.Argument(..., help="Environment mode command: 'configure' or 'test'"),
|
|
194
|
+
project: Optional[str] = typer.Option(None, "--project", "-p", help="Project name (auto-detected if not provided)"),
|
|
195
|
+
repo_url: Optional[str] = typer.Option(
|
|
196
|
+
None, "--repo-url", help="Repository URL (from REPO_URL env var if not provided)"
|
|
197
|
+
),
|
|
198
|
+
branch: Optional[str] = typer.Option(None, "--branch", help="Branch (from TEST_BRANCH env var if not provided)"),
|
|
199
|
+
universe_branch: Optional[str] = typer.Option(None, "--universe-branch", help="Universe branch (from UNIVERSE_BRANCH env var if not provided)"),
|
|
200
|
+
debug_terms: bool = typer.Option(True, "--debug-terms/--no-debug-terms", help="Show detailed debugging info for missing terms"),
|
|
201
|
+
):
|
|
202
|
+
"""
|
|
203
|
+
Environment variable mode for CI/CD integration and automated testing.
|
|
204
|
+
|
|
205
|
+
Reads configuration from environment variables:
|
|
206
|
+
- REPO_URL: Repository URL to test
|
|
207
|
+
- TEST_BRANCH: Branch to test
|
|
208
|
+
- PROJECT_NAME: Project name (auto-detected if not set)
|
|
209
|
+
- UNIVERSE_BRANCH: Universe branch to test (optional)
|
|
210
|
+
- ESGVOC_LIBRARY_BRANCH: ESGVoc library branch (informational)
|
|
211
|
+
|
|
212
|
+
Examples:
|
|
213
|
+
# Set environment and run
|
|
214
|
+
export REPO_URL=https://github.com/me/obs4MIPs_CVs
|
|
215
|
+
export TEST_BRANCH=test-branch
|
|
216
|
+
export UNIVERSE_BRANCH=my-universe-branch
|
|
217
|
+
esgvoc test env configure
|
|
218
|
+
esgvoc test env test
|
|
219
|
+
|
|
220
|
+
# Or use options
|
|
221
|
+
esgvoc test env configure --project obs4mip --repo-url https://github.com/me/repo --branch main --universe-branch dev
|
|
222
|
+
"""
|
|
223
|
+
import os
|
|
224
|
+
|
|
225
|
+
# Get config from environment or options
|
|
226
|
+
final_repo_url = repo_url or os.environ.get("REPO_URL")
|
|
227
|
+
final_branch = branch or os.environ.get("TEST_BRANCH")
|
|
228
|
+
final_universe_branch = universe_branch or os.environ.get("UNIVERSE_BRANCH")
|
|
229
|
+
final_project = project or os.environ.get("PROJECT_NAME")
|
|
230
|
+
|
|
231
|
+
# Auto-detect project if not provided
|
|
232
|
+
if not final_project:
|
|
233
|
+
from esgvoc.apps.test_cv.cv_tester import detect_project_name
|
|
234
|
+
|
|
235
|
+
final_project = detect_project_name()
|
|
236
|
+
|
|
237
|
+
if command == "configure":
|
|
238
|
+
if not final_repo_url or not final_branch:
|
|
239
|
+
console.print("[red]❌ REPO_URL and TEST_BRANCH are required for env configure[/red]")
|
|
240
|
+
console.print("Set environment variables or use --repo-url and --branch options")
|
|
241
|
+
raise typer.Exit(1)
|
|
242
|
+
|
|
243
|
+
# Use configure command
|
|
244
|
+
configure(final_project, final_repo_url, final_branch, final_universe_branch, sync=True)
|
|
245
|
+
|
|
246
|
+
elif command == "test":
|
|
247
|
+
# Use run command
|
|
248
|
+
run(final_project, None, final_repo_url, final_branch, final_universe_branch, debug_terms)
|
|
249
|
+
|
|
250
|
+
else:
|
|
251
|
+
console.print(f"[red]❌ Invalid env command '{command}'. Use 'configure' or 'test'[/red]")
|
|
252
|
+
raise typer.Exit(1)
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
if __name__ == "__main__":
|
|
256
|
+
app()
|
|
257
|
+
|
esgvoc/core/constants.py
CHANGED
|
@@ -1,13 +1,16 @@
|
|
|
1
1
|
DIRNAME_AND_FILENAME_SEPARATOR = "_"
|
|
2
|
-
PROJECT_SPECS_FILENAME = "project_specs.
|
|
2
|
+
PROJECT_SPECS_FILENAME = "project_specs.yaml"
|
|
3
|
+
DRS_SPECS_FILENAME = "drs_specs.yaml"
|
|
4
|
+
CATALOG_SPECS_FILENAME = "catalog_specs.yaml"
|
|
5
|
+
ATTRIBUTES_SPECS_FILENAME = "attr_specs.yaml"
|
|
3
6
|
PROJECT_ID_JSON_KEY = "project_id"
|
|
4
7
|
CONTEXT_FILENAME = "000_context.jsonld"
|
|
5
8
|
CONTEXT_JSON_KEY = "@context"
|
|
6
|
-
TERM_ID_JSON_KEY =
|
|
7
|
-
COMPOSITE_PARTS_JSON_KEY =
|
|
8
|
-
COMPOSITE_SEPARATOR_JSON_KEY =
|
|
9
|
-
PATTERN_JSON_KEY =
|
|
10
|
-
TERM_TYPE_JSON_KEY =
|
|
11
|
-
DRS_SPECS_JSON_KEY =
|
|
9
|
+
TERM_ID_JSON_KEY = "id"
|
|
10
|
+
COMPOSITE_PARTS_JSON_KEY = "parts"
|
|
11
|
+
COMPOSITE_SEPARATOR_JSON_KEY = "separator"
|
|
12
|
+
PATTERN_JSON_KEY = "regex"
|
|
13
|
+
TERM_TYPE_JSON_KEY = "type"
|
|
14
|
+
DRS_SPECS_JSON_KEY = "drs_name"
|
|
12
15
|
SQLITE_FIRST_PK = 1
|
|
13
16
|
DATA_DESCRIPTOR_JSON_KEY = "@base"
|
esgvoc/core/data_handler.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
|
|
2
1
|
import os
|
|
3
2
|
import json
|
|
4
3
|
import logging
|
|
@@ -9,11 +8,13 @@ from pyld import jsonld
|
|
|
9
8
|
from pydantic import BaseModel, model_validator, ConfigDict
|
|
10
9
|
|
|
11
10
|
from esgvoc.api.data_descriptors import DATA_DESCRIPTOR_CLASS_MAPPING
|
|
11
|
+
|
|
12
12
|
# Configure logging
|
|
13
13
|
_LOGGER = logging.getLogger(__name__)
|
|
14
14
|
|
|
15
15
|
mapping = DATA_DESCRIPTOR_CLASS_MAPPING
|
|
16
16
|
|
|
17
|
+
|
|
17
18
|
def unified_document_loader(uri: str) -> Dict:
|
|
18
19
|
"""Load a document from a local file or a remote URI."""
|
|
19
20
|
if uri.startswith(("http://", "https://")):
|
|
@@ -27,6 +28,7 @@ def unified_document_loader(uri: str) -> Dict:
|
|
|
27
28
|
with open(uri, "r") as f:
|
|
28
29
|
return json.load(f)
|
|
29
30
|
|
|
31
|
+
|
|
30
32
|
class JsonLdResource(BaseModel):
|
|
31
33
|
uri: str
|
|
32
34
|
local_path: Optional[str] = None
|
|
@@ -40,13 +42,15 @@ class JsonLdResource(BaseModel):
|
|
|
40
42
|
local_path = values.get("local_path")
|
|
41
43
|
if local_path:
|
|
42
44
|
values["local_path"] = os.path.abspath(local_path) + "/"
|
|
43
|
-
jsonld.set_document_loader(
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
45
|
+
jsonld.set_document_loader(
|
|
46
|
+
lambda uri, options: {
|
|
47
|
+
"contextUrl": None, # No special context URL
|
|
48
|
+
"documentUrl": uri, # The document's actual URL
|
|
49
|
+
"document": unified_document_loader(uri), # The parsed JSON-LD document
|
|
50
|
+
}
|
|
51
|
+
)
|
|
48
52
|
return values
|
|
49
|
-
|
|
53
|
+
|
|
50
54
|
@cached_property
|
|
51
55
|
def json_dict(self) -> Dict:
|
|
52
56
|
"""Fetch the original JSON data."""
|
|
@@ -58,12 +62,12 @@ class JsonLdResource(BaseModel):
|
|
|
58
62
|
"""Expand the JSON-LD data."""
|
|
59
63
|
_LOGGER.debug(f"Expanding JSON-LD data for {self.uri}")
|
|
60
64
|
return jsonld.expand(self.uri, options={"base": self.uri})
|
|
61
|
-
|
|
65
|
+
|
|
62
66
|
@cached_property
|
|
63
67
|
def context(self) -> Dict:
|
|
64
68
|
"""Fetch and return the JSON content of the '@context'."""
|
|
65
|
-
|
|
66
|
-
context_data =JsonLdResource(uri="/".join(self.uri.split("/")[:-1])+"/"+self.json_dict["@context"])
|
|
69
|
+
|
|
70
|
+
context_data = JsonLdResource(uri="/".join(self.uri.split("/")[:-1]) + "/" + self.json_dict["@context"])
|
|
67
71
|
# Works only in relative path declaration
|
|
68
72
|
|
|
69
73
|
context_value = context_data.json_dict
|
|
@@ -83,9 +87,7 @@ class JsonLdResource(BaseModel):
|
|
|
83
87
|
def normalized(self) -> str:
|
|
84
88
|
"""Normalize the JSON-LD data."""
|
|
85
89
|
_LOGGER.info(f"Normalizing JSON-LD data for {self.uri}")
|
|
86
|
-
return jsonld.normalize(
|
|
87
|
-
self.uri, options={"algorithm": "URDNA2015", "format": "application/n-quads"}
|
|
88
|
-
)
|
|
90
|
+
return jsonld.normalize(self.uri, options={"algorithm": "URDNA2015", "format": "application/n-quads"})
|
|
89
91
|
|
|
90
92
|
@cached_property
|
|
91
93
|
def python(self) -> Optional[Any]:
|
|
@@ -120,14 +122,14 @@ class JsonLdResource(BaseModel):
|
|
|
120
122
|
|
|
121
123
|
|
|
122
124
|
if __name__ == "__main__":
|
|
123
|
-
## For Universe
|
|
124
|
-
#online
|
|
125
|
+
## For Universe
|
|
126
|
+
# online
|
|
125
127
|
# d = Data(uri = "https://espri-mod.github.io/mip-cmor-tables/activity/cmip.json")
|
|
126
128
|
# print(d.info)
|
|
127
|
-
#offline
|
|
128
|
-
#print(Data(uri = ".cache/repos/mip-cmor-tables/activity/cmip.json").info)
|
|
129
|
-
## for Project
|
|
130
|
-
#d = Data(uri = "https://espri-mod.github.io/CMIP6Plus_CVs/activity_id/cmip.json")
|
|
131
|
-
#print(d.info)
|
|
132
|
-
#offline
|
|
133
|
-
print(JsonLdResource(uri
|
|
129
|
+
# offline
|
|
130
|
+
# print(Data(uri = ".cache/repos/mip-cmor-tables/activity/cmip.json").info)
|
|
131
|
+
## for Project
|
|
132
|
+
# d = Data(uri = "https://espri-mod.github.io/CMIP6Plus_CVs/activity_id/cmip.json")
|
|
133
|
+
# print(d.info)
|
|
134
|
+
# offline
|
|
135
|
+
print(JsonLdResource(uri=".cache/repos/CMIP6Plus_CVs/activity_id/cmip.json").info)
|
esgvoc/core/db/connection.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
|
|
4
|
+
import yaml
|
|
4
5
|
from sqlalchemy import Engine
|
|
5
6
|
from sqlmodel import Session, create_engine
|
|
6
7
|
|
|
@@ -31,3 +32,9 @@ class DBConnection:
|
|
|
31
32
|
|
|
32
33
|
def read_json_file(json_file_path: Path) -> dict:
|
|
33
34
|
return json.loads(json_file_path.read_text())
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def read_yaml_file(yaml_file_path: Path) -> dict:
|
|
38
|
+
with open(yaml_file_path, 'r') as file:
|
|
39
|
+
result = yaml.safe_load(file)
|
|
40
|
+
return result
|
|
@@ -8,7 +8,7 @@ import esgvoc.core.constants
|
|
|
8
8
|
import esgvoc.core.db.connection as db
|
|
9
9
|
import esgvoc.core.service as service
|
|
10
10
|
from esgvoc.core.data_handler import JsonLdResource
|
|
11
|
-
from esgvoc.core.db.connection import DBConnection, read_json_file
|
|
11
|
+
from esgvoc.core.db.connection import DBConnection, read_json_file, read_yaml_file
|
|
12
12
|
from esgvoc.core.db.models.mixins import TermKind
|
|
13
13
|
from esgvoc.core.db.models.project import PCollection, Project, PTerm
|
|
14
14
|
from esgvoc.core.exceptions import EsgvocDbError
|
|
@@ -88,7 +88,10 @@ def ingest_collection(collection_dir_path: Path, project: Project, project_db_se
|
|
|
88
88
|
term_kind_collection = term_kind
|
|
89
89
|
|
|
90
90
|
except Exception as e:
|
|
91
|
-
_LOGGER.warning(
|
|
91
|
+
_LOGGER.warning(
|
|
92
|
+
f"Unable to read term file {term_file_path} in collection '{collection_id}' "
|
|
93
|
+
+ f"of project '{project.id}'. Skip.\n{str(e)}"
|
|
94
|
+
)
|
|
92
95
|
continue
|
|
93
96
|
try:
|
|
94
97
|
term = PTerm(
|
|
@@ -99,14 +102,28 @@ def ingest_collection(collection_dir_path: Path, project: Project, project_db_se
|
|
|
99
102
|
)
|
|
100
103
|
project_db_session.add(term)
|
|
101
104
|
except Exception as e:
|
|
105
|
+
# Enhanced error reporting for term validation failures
|
|
102
106
|
_LOGGER.error(
|
|
103
|
-
f"
|
|
104
|
-
+ f"
|
|
107
|
+
f"Failed to create term '{term_id}' in collection '{collection_id}' "
|
|
108
|
+
+ f"of project '{project.id}' from file '{term_file_path}': {str(e)}"
|
|
105
109
|
)
|
|
106
110
|
continue
|
|
107
111
|
if term_kind_collection:
|
|
108
112
|
collection.term_kind = term_kind_collection
|
|
109
|
-
|
|
113
|
+
else:
|
|
114
|
+
# If we couldn't determine a term kind, use PLAIN as default and log warning
|
|
115
|
+
_LOGGER.warning(
|
|
116
|
+
f"No term kind determined for collection '{collection_id}' in project '{project.id}'. "
|
|
117
|
+
+ "Using PLAIN as default. This might indicate empty collection or processing errors."
|
|
118
|
+
)
|
|
119
|
+
collection.term_kind = TermKind.PLAIN
|
|
120
|
+
|
|
121
|
+
try:
|
|
122
|
+
project_db_session.add(collection)
|
|
123
|
+
except Exception as e:
|
|
124
|
+
error_context = f"Failed to add collection '{collection_id}' to project '{project.id}'"
|
|
125
|
+
_LOGGER.error(f"{error_context}: {str(e)}")
|
|
126
|
+
raise EsgvocDbError(f"{error_context}: {str(e)}") from e
|
|
110
127
|
|
|
111
128
|
|
|
112
129
|
def ingest_project(project_dir_path: Path, project_db_file_path: Path, git_hash: str):
|
|
@@ -119,15 +136,23 @@ def ingest_project(project_dir_path: Path, project_db_file_path: Path, git_hash:
|
|
|
119
136
|
|
|
120
137
|
with project_connection.create_session() as project_db_session:
|
|
121
138
|
project_specs_file_path = project_dir_path.joinpath(esgvoc.core.constants.PROJECT_SPECS_FILENAME)
|
|
139
|
+
drs_specs_file_path = project_dir_path.joinpath(esgvoc.core.constants.DRS_SPECS_FILENAME)
|
|
140
|
+
catalog_specs_file_path = project_dir_path.joinpath(esgvoc.core.constants.CATALOG_SPECS_FILENAME)
|
|
122
141
|
try:
|
|
123
|
-
|
|
124
|
-
project_id =
|
|
142
|
+
raw_project_specs = read_yaml_file(project_specs_file_path)
|
|
143
|
+
project_id = raw_project_specs[esgvoc.core.constants.PROJECT_ID_JSON_KEY]
|
|
144
|
+
raw_drs_specs = read_yaml_file(drs_specs_file_path)
|
|
145
|
+
project_specs = raw_project_specs
|
|
146
|
+
project_specs['drs_specs'] = raw_drs_specs
|
|
147
|
+
if catalog_specs_file_path.exists():
|
|
148
|
+
raw_catalog_specs = read_yaml_file(catalog_specs_file_path)
|
|
149
|
+
project_specs['catalog_specs'] = raw_catalog_specs
|
|
125
150
|
except Exception as e:
|
|
126
|
-
msg = f"unable to read
|
|
151
|
+
msg = f"unable to read specs files in {project_dir_path}"
|
|
127
152
|
_LOGGER.fatal(msg)
|
|
128
153
|
raise EsgvocDbError(msg) from e
|
|
129
154
|
|
|
130
|
-
project = Project(id=project_id, specs=
|
|
155
|
+
project = Project(id=project_id, specs=project_specs, git_hash=git_hash)
|
|
131
156
|
project_db_session.add(project)
|
|
132
157
|
|
|
133
158
|
for collection_dir_path in project_dir_path.iterdir():
|
|
@@ -15,6 +15,7 @@ from esgvoc.core.exceptions import EsgvocDbError
|
|
|
15
15
|
from esgvoc.core.service.data_merger import DataMerger
|
|
16
16
|
|
|
17
17
|
_LOGGER = logging.getLogger(__name__)
|
|
18
|
+
logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
|
18
19
|
|
|
19
20
|
|
|
20
21
|
def infer_term_kind(json_specs: dict) -> TermKind:
|
|
@@ -81,7 +82,6 @@ def ingest_metadata_universe(connection, git_hash):
|
|
|
81
82
|
|
|
82
83
|
def ingest_data_descriptor(data_descriptor_path: Path, connection: db.DBConnection) -> None:
|
|
83
84
|
data_descriptor_id = data_descriptor_path.name
|
|
84
|
-
|
|
85
85
|
context_file_path = data_descriptor_path.joinpath(esgvoc.core.constants.CONTEXT_FILENAME)
|
|
86
86
|
try:
|
|
87
87
|
context = read_json_file(context_file_path)
|
|
@@ -108,7 +108,6 @@ def ingest_data_descriptor(data_descriptor_path: Path, connection: db.DBConnecti
|
|
|
108
108
|
json_specs = DataMerger(
|
|
109
109
|
data=JsonLdResource(uri=str(term_file_path)), locally_available=locally_available
|
|
110
110
|
).merge_linked_json()[-1]
|
|
111
|
-
|
|
112
111
|
term_kind = infer_term_kind(json_specs)
|
|
113
112
|
term_id = json_specs["id"]
|
|
114
113
|
|