esgvoc 1.0.1__py3-none-any.whl → 1.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of esgvoc might be problematic. Click here for more details.

Files changed (41) hide show
  1. esgvoc/__init__.py +1 -1
  2. esgvoc/api/__init__.py +0 -6
  3. esgvoc/api/data_descriptors/__init__.py +6 -0
  4. esgvoc/api/data_descriptors/archive.py +5 -0
  5. esgvoc/api/data_descriptors/citation_url.py +5 -0
  6. esgvoc/api/data_descriptors/experiment.py +2 -2
  7. esgvoc/api/data_descriptors/known_branded_variable.py +58 -5
  8. esgvoc/api/data_descriptors/regex.py +5 -0
  9. esgvoc/api/data_descriptors/vertical_label.py +2 -2
  10. esgvoc/api/project_specs.py +48 -130
  11. esgvoc/api/projects.py +104 -63
  12. esgvoc/apps/drs/generator.py +47 -42
  13. esgvoc/apps/drs/validator.py +22 -38
  14. esgvoc/apps/jsg/json_schema_generator.py +252 -136
  15. esgvoc/apps/jsg/templates/template.jinja +249 -0
  16. esgvoc/apps/test_cv/README.md +214 -0
  17. esgvoc/apps/test_cv/cv_tester.py +1368 -0
  18. esgvoc/apps/test_cv/example_usage.py +216 -0
  19. esgvoc/apps/vr/__init__.py +12 -0
  20. esgvoc/apps/vr/build_variable_registry.py +71 -0
  21. esgvoc/apps/vr/example_usage.py +60 -0
  22. esgvoc/apps/vr/vr_app.py +333 -0
  23. esgvoc/cli/config.py +671 -86
  24. esgvoc/cli/drs.py +39 -21
  25. esgvoc/cli/main.py +2 -0
  26. esgvoc/cli/test_cv.py +257 -0
  27. esgvoc/core/constants.py +10 -7
  28. esgvoc/core/data_handler.py +24 -22
  29. esgvoc/core/db/connection.py +7 -0
  30. esgvoc/core/db/project_ingestion.py +34 -9
  31. esgvoc/core/db/universe_ingestion.py +1 -2
  32. esgvoc/core/service/configuration/setting.py +192 -21
  33. esgvoc/core/service/data_merger.py +1 -1
  34. esgvoc/core/service/state.py +18 -2
  35. {esgvoc-1.0.1.dist-info → esgvoc-1.1.2.dist-info}/METADATA +3 -1
  36. {esgvoc-1.0.1.dist-info → esgvoc-1.1.2.dist-info}/RECORD +40 -29
  37. esgvoc/apps/jsg/cmip6_template.json +0 -74
  38. /esgvoc/apps/{py.typed → test_cv/__init__.py} +0 -0
  39. {esgvoc-1.0.1.dist-info → esgvoc-1.1.2.dist-info}/WHEEL +0 -0
  40. {esgvoc-1.0.1.dist-info → esgvoc-1.1.2.dist-info}/entry_points.txt +0 -0
  41. {esgvoc-1.0.1.dist-info → esgvoc-1.1.2.dist-info}/licenses/LICENSE.txt +0 -0
esgvoc/cli/drs.py CHANGED
@@ -16,12 +16,12 @@ app = typer.Typer()
16
16
  console = Console()
17
17
 
18
18
 
19
-
20
19
  # Predefined list of projects and DRS types
21
20
  # projects = ["cmip5", "cmip6","cmip6plus", "cmip7"]
22
21
  projects = ev.get_all_projects()
23
22
  drs_types = ["filename", "directory", "dataset"]
24
23
 
24
+
25
25
  def display(table):
26
26
  """
27
27
  Function to display a rich table in the console.
@@ -34,15 +34,26 @@ def display(table):
34
34
 
35
35
  @app.command()
36
36
  def drsvalid(
37
- drs_entries: Optional[List[str]] = typer.Argument(None, help="List of DRS validation inputs in the form <project> <drstype> <string>"),
38
- file: Optional[typer.FileText] = typer.Option(None, "--file", "-f", help="File containing DRS validation inputs, one per line in the form <project> <drstype> <string>"),
37
+ drs_entries: Optional[List[str]] = typer.Argument(
38
+ None, help="List of DRS validation inputs in the form <project> <drstype> <string>"
39
+ ),
40
+ file: Optional[typer.FileText] = typer.Option(
41
+ None,
42
+ "--file",
43
+ "-f",
44
+ help="File containing DRS validation inputs, one per line in the form <project> <drstype> <string>",
45
+ ),
39
46
  verbose: bool = typer.Option(False, "-v", "--verbose", help="Provide detailed validation results"),
40
47
  output: Optional[str] = typer.Option(None, "-o", "--output", help="File to save the DRS entries validation"),
41
- rm_prefix: Optional[str] = typer.Option(None,"-p","--prefix", help="Remove given prefix from all checked directory"),
42
- pedantic: Optional[bool] = typer.Option(False,"-e","--enforce", help="Enable pedantic mode, enforcing strict compliance, mean that warnings are now errors.")
43
-
44
-
45
-
48
+ rm_prefix: Optional[str] = typer.Option(
49
+ None, "-p", "--prefix", help="Remove given prefix from all checked directory"
50
+ ),
51
+ pedantic: Optional[bool] = typer.Option(
52
+ False,
53
+ "-e",
54
+ "--enforce",
55
+ help="Enable pedantic mode, enforcing strict compliance, mean that warnings are now errors.",
56
+ ),
46
57
  ) -> List[DrsValidationReport]:
47
58
  """
48
59
  Validates DRS strings for a specific project and type.
@@ -68,14 +79,13 @@ def drsvalid(
68
79
  if not sys.stdin.isatty(): # Check if input is being piped via stdin
69
80
  entries.extend(el for line in sys.stdin for el in shlex.split(line))
70
81
 
71
-
72
82
  if file:
73
83
  entries.extend(el for line in file for el in line.strip().split(" "))
74
84
 
75
85
  i = 0
76
86
  while i < len(entries):
77
- if entries[i] in [""," "]:
78
- i+=1
87
+ if entries[i] in ["", " "]:
88
+ i += 1
79
89
  continue
80
90
 
81
91
  if entries[i] in projects:
@@ -102,9 +112,9 @@ def drsvalid(
102
112
  report = validator.validate_file_name(string)
103
113
  case "directory":
104
114
  if rm_prefix:
105
- prefix = rm_prefix+"/" if rm_prefix[-1]!="/" else ""
115
+ prefix = rm_prefix + "/" if rm_prefix[-1] != "/" else ""
106
116
  else:
107
- prefix=None
117
+ prefix = None
108
118
  report = validator.validate_directory(string, prefix)
109
119
  case "dataset":
110
120
  report = validator.validate_dataset_id(string)
@@ -127,8 +137,8 @@ def drsvalid(
127
137
  errors = "\n".join(["⚠️ " + str(error) for error in report.errors])
128
138
  valid = "✅ Valid" if report else "❌ Invalid"
129
139
 
130
- table.add_row("-"*4,"-"*4,"-"*4,"-"*4,"-"*4)
131
- table.add_row(entry,proj_and_type, warnings, errors, valid)
140
+ table.add_row("-" * 4, "-" * 4, "-" * 4, "-" * 4, "-" * 4)
141
+ table.add_row(entry, proj_and_type, warnings, errors, valid)
132
142
 
133
143
  console.print(table)
134
144
  elif output:
@@ -137,7 +147,6 @@ def drsvalid(
137
147
  f.write(str(report) + "\n")
138
148
  console.print(f"DRS validation entries saved to [green]{output}[/green]")
139
149
 
140
-
141
150
  else:
142
151
  for report in reports:
143
152
  console.print(str(report))
@@ -147,8 +156,15 @@ def drsvalid(
147
156
 
148
157
  @app.command()
149
158
  def drsgen(
150
- drs_entries: Optional[List[str]] = typer.Argument(None, help="List of inputs to generate DRS in the form <project> <drstype> <bag_of_terms>"),
151
- file: Optional[typer.FileText] = typer.Option(None, "--file", "-f", help="File containing DRS generation inputs, one per line in the form <project> <drstype> <bag_of_terms>"),
159
+ drs_entries: Optional[List[str]] = typer.Argument(
160
+ None, help="List of inputs to generate DRS in the form <project> <drstype> <bag_of_terms>"
161
+ ),
162
+ file: Optional[typer.FileText] = typer.Option(
163
+ None,
164
+ "--file",
165
+ "-f",
166
+ help="File containing DRS generation inputs, one per line in the form <project> <drstype> <bag_of_terms>",
167
+ ),
152
168
  verbose: bool = typer.Option(False, "-v", "--verbose", help="Provide detailed generation results"),
153
169
  output: Optional[str] = typer.Option(None, "-o", "--output", help="File to save the generated DRS entries"),
154
170
  ) -> List[DrsGenerationReport]:
@@ -182,8 +198,8 @@ def drsgen(
182
198
 
183
199
  i = 0
184
200
  while i < len(entries):
185
- if entries[i] in [""," "]:
186
- i+=1
201
+ if entries[i] in ["", " "]:
202
+ i += 1
187
203
  continue
188
204
  if entries[i] in projects:
189
205
  current_project = entries[i]
@@ -231,7 +247,7 @@ def drsgen(
231
247
  table.add_row(entry, warnings, errors, result)
232
248
  table.add_row("----", "----", "----", "----")
233
249
  if table.columns[3].width is not None and len(result) > table.columns[3].width:
234
- table.columns[3].width = len(result)+1
250
+ table.columns[3].width = len(result) + 1
235
251
  console.print(table)
236
252
 
237
253
  elif output:
@@ -245,5 +261,7 @@ def drsgen(
245
261
  console.print(str(report))
246
262
 
247
263
  return generated_reports
264
+
265
+
248
266
  if __name__ == "__main__":
249
267
  app()
esgvoc/cli/main.py CHANGED
@@ -6,6 +6,7 @@ from esgvoc.cli.find import app as find_app
6
6
  from esgvoc.cli.get import app as get_app
7
7
  from esgvoc.cli.install import app as install_app
8
8
  from esgvoc.cli.status import app as status_app
9
+ from esgvoc.cli.test_cv import app as test_cv_app
9
10
  from esgvoc.cli.valid import app as valid_app
10
11
 
11
12
  app = typer.Typer()
@@ -17,6 +18,7 @@ app.add_typer(valid_app)
17
18
  app.add_typer(install_app)
18
19
  app.add_typer(drs_app)
19
20
  app.add_typer(config_app, name="config")
21
+ app.add_typer(test_cv_app, name="test")
20
22
  app.add_typer(find_app)
21
23
 
22
24
 
esgvoc/cli/test_cv.py ADDED
@@ -0,0 +1,257 @@
1
+ """
2
+ Test CV CLI commands
3
+
4
+ Provides commands for testing project CVs and Universe CVs integrated with esgvoc CLI.
5
+ """
6
+
7
+ from typing import Optional
8
+
9
+ import typer
10
+ from rich.console import Console
11
+ from rich.table import Table
12
+
13
+ from esgvoc.apps.test_cv.cv_tester import CVTester
14
+ from esgvoc.core.service.configuration.setting import ServiceSettings
15
+
16
+ app = typer.Typer()
17
+ console = Console()
18
+
19
+
20
+ @app.command()
21
+ def list_projects():
22
+ """List all available CV projects that can be tested."""
23
+ tester = CVTester()
24
+ projects = tester.get_available_projects()
25
+
26
+ table = Table(title="Available CV Projects for Testing")
27
+ table.add_column("Project Name", style="cyan")
28
+ table.add_column("Repository", style="green")
29
+ table.add_column("Default Branch", style="yellow")
30
+ table.add_column("Local Path", style="blue")
31
+
32
+ for project_name in projects:
33
+ config = ServiceSettings.DEFAULT_PROJECT_CONFIGS[project_name]
34
+ table.add_row(project_name, config["github_repo"], config["branch"], config["local_path"])
35
+
36
+ console.print(table)
37
+ console.print(f"\n[blue]Total: {len(projects)} projects available for testing[/blue]")
38
+
39
+
40
+ @app.command()
41
+ def configure(
42
+ project: str = typer.Argument(..., help="Project name to configure for testing"),
43
+ repo_url: Optional[str] = typer.Option(None, "--repo", "-r", help="Custom repository URL"),
44
+ branch: Optional[str] = typer.Option(None, "--branch", "-b", help="Custom branch to test"),
45
+ universe_branch: Optional[str] = typer.Option(None, "--universe-branch", "-u", help="Custom universe branch"),
46
+ sync: bool = typer.Option(True, "--sync/--no-sync", help="Synchronize CVs after configuration"),
47
+ ):
48
+ """
49
+ Configure esgvoc with a specific project for testing.
50
+
51
+ Examples:
52
+ esgvoc test configure obs4mip
53
+ esgvoc test configure cmip6 --branch my-test-branch
54
+ esgvoc test configure cmip6 --universe-branch my-universe-branch
55
+ esgvoc test configure custom --repo https://github.com/me/my-cvs --branch main --universe-branch dev
56
+ """
57
+ tester = CVTester()
58
+
59
+ try:
60
+ # Configure
61
+ if not tester.configure_for_testing(project, repo_url, branch, None, universe_branch):
62
+ raise typer.Exit(1)
63
+
64
+ # Optionally synchronize
65
+ if sync:
66
+ if not tester.synchronize_cvs():
67
+ raise typer.Exit(1)
68
+
69
+ console.print(f"[green]✅ Successfully configured project '{project}' for testing[/green]")
70
+ if not sync:
71
+ console.print("[yellow]Note: CVs not synchronized. Run 'esgvoc test sync' to download.[/yellow]")
72
+
73
+ except Exception as e:
74
+ console.print(f"[red]❌ Configuration failed: {e}[/red]")
75
+ raise typer.Exit(1)
76
+
77
+
78
+ @app.command()
79
+ def sync():
80
+ """Synchronize/download CVs for the currently configured project."""
81
+ tester = CVTester()
82
+
83
+ try:
84
+ if not tester.synchronize_cvs():
85
+ raise typer.Exit(1)
86
+ console.print("[green]✅ CVs synchronized successfully[/green]")
87
+ except Exception as e:
88
+ console.print(f"[red]❌ Synchronization failed: {e}[/red]")
89
+ raise typer.Exit(1)
90
+
91
+
92
+ @app.command()
93
+ def structure(
94
+ path: str = typer.Argument(".", help="Path to CV repository to validate"),
95
+ ):
96
+ """
97
+ Test CV repository structure and file format compliance.
98
+
99
+ Validates:
100
+ - Collection directory structure
101
+ - JSONLD context files
102
+ - Element JSON files
103
+ - project_specs.json references
104
+
105
+ Examples:
106
+ esgvoc test structure .
107
+ esgvoc test structure /path/to/cv/repo
108
+ """
109
+ tester = CVTester()
110
+
111
+ try:
112
+ if not tester.test_repository_structure(path):
113
+ raise typer.Exit(1)
114
+ console.print("[green]✅ Repository structure validation passed[/green]")
115
+ except Exception as e:
116
+ console.print(f"[red]❌ Structure validation failed: {e}[/red]")
117
+ raise typer.Exit(1)
118
+
119
+
120
+ @app.command()
121
+ def api(
122
+ project: str = typer.Argument(..., help="Project name to test API access for"),
123
+ path: str = typer.Argument(".", help="Path to CV repository"),
124
+ debug_terms: bool = typer.Option(True, "--debug-terms/--no-debug-terms", help="Show detailed debugging info for missing terms"),
125
+ ):
126
+ """
127
+ Test esgvoc API access for all repository collections and elements.
128
+
129
+ Validates:
130
+ - Project is accessible via esgvoc API
131
+ - All repository collections are queryable
132
+ - All repository elements are accessible
133
+ - API functions work correctly
134
+
135
+ Examples:
136
+ esgvoc test api obs4mip .
137
+ esgvoc test api cmip6 /path/to/cmip6/repo
138
+ """
139
+ tester = CVTester(debug_missing_terms=debug_terms)
140
+
141
+ try:
142
+ if not tester.test_esgvoc_api_access(project, path):
143
+ raise typer.Exit(1)
144
+ console.print("[green]✅ ESGVoc API access validation passed[/green]")
145
+ except Exception as e:
146
+ console.print(f"[red]❌ API validation failed: {e}[/red]")
147
+ raise typer.Exit(1)
148
+
149
+
150
+ @app.command()
151
+ def run(
152
+ project: str = typer.Argument(..., help="Project name to test"),
153
+ path: Optional[str] = typer.Argument(None, help="Path to CV repository (auto-detected if not provided)"),
154
+ repo_url: Optional[str] = typer.Option(None, "--repo", "-r", help="Custom repository URL"),
155
+ branch: Optional[str] = typer.Option(None, "--branch", "-b", help="Custom branch to test"),
156
+ universe_branch: Optional[str] = typer.Option(None, "--universe-branch", "-u", help="Custom universe branch"),
157
+ debug_terms: bool = typer.Option(True, "--debug-terms/--no-debug-terms", help="Show detailed debugging info for missing terms"),
158
+ ):
159
+ """
160
+ Run complete CV test suite: configure, sync, structure, and API tests.
161
+
162
+ This is the comprehensive test that runs all validation steps:
163
+ 1. Configure esgvoc with the specified project
164
+ 2. Synchronize/download CVs
165
+ 3. Validate repository structure
166
+ 4. Test esgvoc API access
167
+
168
+ Examples:
169
+ esgvoc test run obs4mip
170
+ esgvoc test run cmip6 --branch my-test-branch
171
+ esgvoc test run cmip6 --universe-branch my-universe-branch
172
+ esgvoc test run cmip6 /path/to/custom/repo --branch my-test-branch --universe-branch dev
173
+ esgvoc test run custom --repo https://github.com/me/cvs --branch main --universe-branch main
174
+ """
175
+ tester = CVTester(debug_missing_terms=debug_terms)
176
+
177
+ try:
178
+ success = tester.run_complete_test(project, repo_url, branch, path, None, universe_branch)
179
+ if success:
180
+ console.print(f"[bold green]🎉 All tests passed for project '{project}'![/bold green]")
181
+ else:
182
+ # The detailed failure information is already printed by cv_tester
183
+ raise typer.Exit(1)
184
+ except Exception as e:
185
+ console.print(f"[red]❌ Test suite failed: {e}[/red]")
186
+ raise typer.Exit(1)
187
+ finally:
188
+ tester.cleanup()
189
+
190
+
191
+ @app.command()
192
+ def env(
193
+ command: str = typer.Argument(..., help="Environment mode command: 'configure' or 'test'"),
194
+ project: Optional[str] = typer.Option(None, "--project", "-p", help="Project name (auto-detected if not provided)"),
195
+ repo_url: Optional[str] = typer.Option(
196
+ None, "--repo-url", help="Repository URL (from REPO_URL env var if not provided)"
197
+ ),
198
+ branch: Optional[str] = typer.Option(None, "--branch", help="Branch (from TEST_BRANCH env var if not provided)"),
199
+ universe_branch: Optional[str] = typer.Option(None, "--universe-branch", help="Universe branch (from UNIVERSE_BRANCH env var if not provided)"),
200
+ debug_terms: bool = typer.Option(True, "--debug-terms/--no-debug-terms", help="Show detailed debugging info for missing terms"),
201
+ ):
202
+ """
203
+ Environment variable mode for CI/CD integration and automated testing.
204
+
205
+ Reads configuration from environment variables:
206
+ - REPO_URL: Repository URL to test
207
+ - TEST_BRANCH: Branch to test
208
+ - PROJECT_NAME: Project name (auto-detected if not set)
209
+ - UNIVERSE_BRANCH: Universe branch to test (optional)
210
+ - ESGVOC_LIBRARY_BRANCH: ESGVoc library branch (informational)
211
+
212
+ Examples:
213
+ # Set environment and run
214
+ export REPO_URL=https://github.com/me/obs4MIPs_CVs
215
+ export TEST_BRANCH=test-branch
216
+ export UNIVERSE_BRANCH=my-universe-branch
217
+ esgvoc test env configure
218
+ esgvoc test env test
219
+
220
+ # Or use options
221
+ esgvoc test env configure --project obs4mip --repo-url https://github.com/me/repo --branch main --universe-branch dev
222
+ """
223
+ import os
224
+
225
+ # Get config from environment or options
226
+ final_repo_url = repo_url or os.environ.get("REPO_URL")
227
+ final_branch = branch or os.environ.get("TEST_BRANCH")
228
+ final_universe_branch = universe_branch or os.environ.get("UNIVERSE_BRANCH")
229
+ final_project = project or os.environ.get("PROJECT_NAME")
230
+
231
+ # Auto-detect project if not provided
232
+ if not final_project:
233
+ from esgvoc.apps.test_cv.cv_tester import detect_project_name
234
+
235
+ final_project = detect_project_name()
236
+
237
+ if command == "configure":
238
+ if not final_repo_url or not final_branch:
239
+ console.print("[red]❌ REPO_URL and TEST_BRANCH are required for env configure[/red]")
240
+ console.print("Set environment variables or use --repo-url and --branch options")
241
+ raise typer.Exit(1)
242
+
243
+ # Use configure command
244
+ configure(final_project, final_repo_url, final_branch, final_universe_branch, sync=True)
245
+
246
+ elif command == "test":
247
+ # Use run command
248
+ run(final_project, None, final_repo_url, final_branch, final_universe_branch, debug_terms)
249
+
250
+ else:
251
+ console.print(f"[red]❌ Invalid env command '{command}'. Use 'configure' or 'test'[/red]")
252
+ raise typer.Exit(1)
253
+
254
+
255
+ if __name__ == "__main__":
256
+ app()
257
+
esgvoc/core/constants.py CHANGED
@@ -1,13 +1,16 @@
1
1
  DIRNAME_AND_FILENAME_SEPARATOR = "_"
2
- PROJECT_SPECS_FILENAME = "project_specs.json"
2
+ PROJECT_SPECS_FILENAME = "project_specs.yaml"
3
+ DRS_SPECS_FILENAME = "drs_specs.yaml"
4
+ CATALOG_SPECS_FILENAME = "catalog_specs.yaml"
5
+ ATTRIBUTES_SPECS_FILENAME = "attr_specs.yaml"
3
6
  PROJECT_ID_JSON_KEY = "project_id"
4
7
  CONTEXT_FILENAME = "000_context.jsonld"
5
8
  CONTEXT_JSON_KEY = "@context"
6
- TERM_ID_JSON_KEY = 'id'
7
- COMPOSITE_PARTS_JSON_KEY = 'parts'
8
- COMPOSITE_SEPARATOR_JSON_KEY = 'separator'
9
- PATTERN_JSON_KEY = 'regex'
10
- TERM_TYPE_JSON_KEY = 'type'
11
- DRS_SPECS_JSON_KEY = 'drs_name'
9
+ TERM_ID_JSON_KEY = "id"
10
+ COMPOSITE_PARTS_JSON_KEY = "parts"
11
+ COMPOSITE_SEPARATOR_JSON_KEY = "separator"
12
+ PATTERN_JSON_KEY = "regex"
13
+ TERM_TYPE_JSON_KEY = "type"
14
+ DRS_SPECS_JSON_KEY = "drs_name"
12
15
  SQLITE_FIRST_PK = 1
13
16
  DATA_DESCRIPTOR_JSON_KEY = "@base"
@@ -1,4 +1,3 @@
1
-
2
1
  import os
3
2
  import json
4
3
  import logging
@@ -9,11 +8,13 @@ from pyld import jsonld
9
8
  from pydantic import BaseModel, model_validator, ConfigDict
10
9
 
11
10
  from esgvoc.api.data_descriptors import DATA_DESCRIPTOR_CLASS_MAPPING
11
+
12
12
  # Configure logging
13
13
  _LOGGER = logging.getLogger(__name__)
14
14
 
15
15
  mapping = DATA_DESCRIPTOR_CLASS_MAPPING
16
16
 
17
+
17
18
  def unified_document_loader(uri: str) -> Dict:
18
19
  """Load a document from a local file or a remote URI."""
19
20
  if uri.startswith(("http://", "https://")):
@@ -27,6 +28,7 @@ def unified_document_loader(uri: str) -> Dict:
27
28
  with open(uri, "r") as f:
28
29
  return json.load(f)
29
30
 
31
+
30
32
  class JsonLdResource(BaseModel):
31
33
  uri: str
32
34
  local_path: Optional[str] = None
@@ -40,13 +42,15 @@ class JsonLdResource(BaseModel):
40
42
  local_path = values.get("local_path")
41
43
  if local_path:
42
44
  values["local_path"] = os.path.abspath(local_path) + "/"
43
- jsonld.set_document_loader(lambda uri,options:{
44
- "contextUrl": None, # No special context URL
45
- "documentUrl": uri, # The document's actual URL
46
- "document": unified_document_loader(uri), # The parsed JSON-LD document
47
- })
45
+ jsonld.set_document_loader(
46
+ lambda uri, options: {
47
+ "contextUrl": None, # No special context URL
48
+ "documentUrl": uri, # The document's actual URL
49
+ "document": unified_document_loader(uri), # The parsed JSON-LD document
50
+ }
51
+ )
48
52
  return values
49
-
53
+
50
54
  @cached_property
51
55
  def json_dict(self) -> Dict:
52
56
  """Fetch the original JSON data."""
@@ -58,12 +62,12 @@ class JsonLdResource(BaseModel):
58
62
  """Expand the JSON-LD data."""
59
63
  _LOGGER.debug(f"Expanding JSON-LD data for {self.uri}")
60
64
  return jsonld.expand(self.uri, options={"base": self.uri})
61
-
65
+
62
66
  @cached_property
63
67
  def context(self) -> Dict:
64
68
  """Fetch and return the JSON content of the '@context'."""
65
-
66
- context_data =JsonLdResource(uri="/".join(self.uri.split("/")[:-1])+"/"+self.json_dict["@context"])
69
+
70
+ context_data = JsonLdResource(uri="/".join(self.uri.split("/")[:-1]) + "/" + self.json_dict["@context"])
67
71
  # Works only in relative path declaration
68
72
 
69
73
  context_value = context_data.json_dict
@@ -83,9 +87,7 @@ class JsonLdResource(BaseModel):
83
87
  def normalized(self) -> str:
84
88
  """Normalize the JSON-LD data."""
85
89
  _LOGGER.info(f"Normalizing JSON-LD data for {self.uri}")
86
- return jsonld.normalize(
87
- self.uri, options={"algorithm": "URDNA2015", "format": "application/n-quads"}
88
- )
90
+ return jsonld.normalize(self.uri, options={"algorithm": "URDNA2015", "format": "application/n-quads"})
89
91
 
90
92
  @cached_property
91
93
  def python(self) -> Optional[Any]:
@@ -120,14 +122,14 @@ class JsonLdResource(BaseModel):
120
122
 
121
123
 
122
124
  if __name__ == "__main__":
123
- ## For Universe
124
- #online
125
+ ## For Universe
126
+ # online
125
127
  # d = Data(uri = "https://espri-mod.github.io/mip-cmor-tables/activity/cmip.json")
126
128
  # print(d.info)
127
- #offline
128
- #print(Data(uri = ".cache/repos/mip-cmor-tables/activity/cmip.json").info)
129
- ## for Project
130
- #d = Data(uri = "https://espri-mod.github.io/CMIP6Plus_CVs/activity_id/cmip.json")
131
- #print(d.info)
132
- #offline
133
- print(JsonLdResource(uri = ".cache/repos/CMIP6Plus_CVs/activity_id/cmip.json").info)
129
+ # offline
130
+ # print(Data(uri = ".cache/repos/mip-cmor-tables/activity/cmip.json").info)
131
+ ## for Project
132
+ # d = Data(uri = "https://espri-mod.github.io/CMIP6Plus_CVs/activity_id/cmip.json")
133
+ # print(d.info)
134
+ # offline
135
+ print(JsonLdResource(uri=".cache/repos/CMIP6Plus_CVs/activity_id/cmip.json").info)
@@ -1,6 +1,7 @@
1
1
  import json
2
2
  from pathlib import Path
3
3
 
4
+ import yaml
4
5
  from sqlalchemy import Engine
5
6
  from sqlmodel import Session, create_engine
6
7
 
@@ -31,3 +32,9 @@ class DBConnection:
31
32
 
32
33
  def read_json_file(json_file_path: Path) -> dict:
33
34
  return json.loads(json_file_path.read_text())
35
+
36
+
37
+ def read_yaml_file(yaml_file_path: Path) -> dict:
38
+ with open(yaml_file_path, 'r') as file:
39
+ result = yaml.safe_load(file)
40
+ return result
@@ -8,7 +8,7 @@ import esgvoc.core.constants
8
8
  import esgvoc.core.db.connection as db
9
9
  import esgvoc.core.service as service
10
10
  from esgvoc.core.data_handler import JsonLdResource
11
- from esgvoc.core.db.connection import DBConnection, read_json_file
11
+ from esgvoc.core.db.connection import DBConnection, read_json_file, read_yaml_file
12
12
  from esgvoc.core.db.models.mixins import TermKind
13
13
  from esgvoc.core.db.models.project import PCollection, Project, PTerm
14
14
  from esgvoc.core.exceptions import EsgvocDbError
@@ -88,7 +88,10 @@ def ingest_collection(collection_dir_path: Path, project: Project, project_db_se
88
88
  term_kind_collection = term_kind
89
89
 
90
90
  except Exception as e:
91
- _LOGGER.warning(f"Unable to read term {term_file_path}. Skip.\n{str(e)}")
91
+ _LOGGER.warning(
92
+ f"Unable to read term file {term_file_path} in collection '{collection_id}' "
93
+ + f"of project '{project.id}'. Skip.\n{str(e)}"
94
+ )
92
95
  continue
93
96
  try:
94
97
  term = PTerm(
@@ -99,14 +102,28 @@ def ingest_collection(collection_dir_path: Path, project: Project, project_db_se
99
102
  )
100
103
  project_db_session.add(term)
101
104
  except Exception as e:
105
+ # Enhanced error reporting for term validation failures
102
106
  _LOGGER.error(
103
- f"fail to find term {term_id} in data descriptor {data_descriptor_id} "
104
- + f"for the collection {collection_id} of the project {project.id}. Skip {term_id}.\n{str(e)}"
107
+ f"Failed to create term '{term_id}' in collection '{collection_id}' "
108
+ + f"of project '{project.id}' from file '{term_file_path}': {str(e)}"
105
109
  )
106
110
  continue
107
111
  if term_kind_collection:
108
112
  collection.term_kind = term_kind_collection
109
- project_db_session.add(collection)
113
+ else:
114
+ # If we couldn't determine a term kind, use PLAIN as default and log warning
115
+ _LOGGER.warning(
116
+ f"No term kind determined for collection '{collection_id}' in project '{project.id}'. "
117
+ + "Using PLAIN as default. This might indicate empty collection or processing errors."
118
+ )
119
+ collection.term_kind = TermKind.PLAIN
120
+
121
+ try:
122
+ project_db_session.add(collection)
123
+ except Exception as e:
124
+ error_context = f"Failed to add collection '{collection_id}' to project '{project.id}'"
125
+ _LOGGER.error(f"{error_context}: {str(e)}")
126
+ raise EsgvocDbError(f"{error_context}: {str(e)}") from e
110
127
 
111
128
 
112
129
  def ingest_project(project_dir_path: Path, project_db_file_path: Path, git_hash: str):
@@ -119,15 +136,23 @@ def ingest_project(project_dir_path: Path, project_db_file_path: Path, git_hash:
119
136
 
120
137
  with project_connection.create_session() as project_db_session:
121
138
  project_specs_file_path = project_dir_path.joinpath(esgvoc.core.constants.PROJECT_SPECS_FILENAME)
139
+ drs_specs_file_path = project_dir_path.joinpath(esgvoc.core.constants.DRS_SPECS_FILENAME)
140
+ catalog_specs_file_path = project_dir_path.joinpath(esgvoc.core.constants.CATALOG_SPECS_FILENAME)
122
141
  try:
123
- project_json_specs = read_json_file(project_specs_file_path)
124
- project_id = project_json_specs[esgvoc.core.constants.PROJECT_ID_JSON_KEY]
142
+ raw_project_specs = read_yaml_file(project_specs_file_path)
143
+ project_id = raw_project_specs[esgvoc.core.constants.PROJECT_ID_JSON_KEY]
144
+ raw_drs_specs = read_yaml_file(drs_specs_file_path)
145
+ project_specs = raw_project_specs
146
+ project_specs['drs_specs'] = raw_drs_specs
147
+ if catalog_specs_file_path.exists():
148
+ raw_catalog_specs = read_yaml_file(catalog_specs_file_path)
149
+ project_specs['catalog_specs'] = raw_catalog_specs
125
150
  except Exception as e:
126
- msg = f"unable to read project specs file {project_specs_file_path}"
151
+ msg = f"unable to read specs files in {project_dir_path}"
127
152
  _LOGGER.fatal(msg)
128
153
  raise EsgvocDbError(msg) from e
129
154
 
130
- project = Project(id=project_id, specs=project_json_specs, git_hash=git_hash)
155
+ project = Project(id=project_id, specs=project_specs, git_hash=git_hash)
131
156
  project_db_session.add(project)
132
157
 
133
158
  for collection_dir_path in project_dir_path.iterdir():
@@ -15,6 +15,7 @@ from esgvoc.core.exceptions import EsgvocDbError
15
15
  from esgvoc.core.service.data_merger import DataMerger
16
16
 
17
17
  _LOGGER = logging.getLogger(__name__)
18
+ logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
18
19
 
19
20
 
20
21
  def infer_term_kind(json_specs: dict) -> TermKind:
@@ -81,7 +82,6 @@ def ingest_metadata_universe(connection, git_hash):
81
82
 
82
83
  def ingest_data_descriptor(data_descriptor_path: Path, connection: db.DBConnection) -> None:
83
84
  data_descriptor_id = data_descriptor_path.name
84
-
85
85
  context_file_path = data_descriptor_path.joinpath(esgvoc.core.constants.CONTEXT_FILENAME)
86
86
  try:
87
87
  context = read_json_file(context_file_path)
@@ -108,7 +108,6 @@ def ingest_data_descriptor(data_descriptor_path: Path, connection: db.DBConnecti
108
108
  json_specs = DataMerger(
109
109
  data=JsonLdResource(uri=str(term_file_path)), locally_available=locally_available
110
110
  ).merge_linked_json()[-1]
111
-
112
111
  term_kind = infer_term_kind(json_specs)
113
112
  term_id = json_specs["id"]
114
113