esgvoc 1.0.1__py3-none-any.whl → 1.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of esgvoc might be problematic. Click here for more details.
- esgvoc/__init__.py +1 -1
- esgvoc/api/__init__.py +0 -6
- esgvoc/api/data_descriptors/__init__.py +6 -0
- esgvoc/api/data_descriptors/archive.py +5 -0
- esgvoc/api/data_descriptors/citation_url.py +5 -0
- esgvoc/api/data_descriptors/experiment.py +2 -2
- esgvoc/api/data_descriptors/known_branded_variable.py +58 -5
- esgvoc/api/data_descriptors/regex.py +5 -0
- esgvoc/api/data_descriptors/vertical_label.py +2 -2
- esgvoc/api/project_specs.py +48 -130
- esgvoc/api/projects.py +104 -63
- esgvoc/apps/drs/generator.py +47 -42
- esgvoc/apps/drs/validator.py +22 -38
- esgvoc/apps/jsg/json_schema_generator.py +252 -136
- esgvoc/apps/jsg/templates/template.jinja +249 -0
- esgvoc/apps/test_cv/README.md +214 -0
- esgvoc/apps/test_cv/cv_tester.py +1368 -0
- esgvoc/apps/test_cv/example_usage.py +216 -0
- esgvoc/apps/vr/__init__.py +12 -0
- esgvoc/apps/vr/build_variable_registry.py +71 -0
- esgvoc/apps/vr/example_usage.py +60 -0
- esgvoc/apps/vr/vr_app.py +333 -0
- esgvoc/cli/config.py +671 -86
- esgvoc/cli/drs.py +39 -21
- esgvoc/cli/main.py +2 -0
- esgvoc/cli/test_cv.py +257 -0
- esgvoc/core/constants.py +10 -7
- esgvoc/core/data_handler.py +24 -22
- esgvoc/core/db/connection.py +7 -0
- esgvoc/core/db/project_ingestion.py +34 -9
- esgvoc/core/db/universe_ingestion.py +1 -2
- esgvoc/core/service/configuration/setting.py +192 -21
- esgvoc/core/service/data_merger.py +1 -1
- esgvoc/core/service/state.py +18 -2
- {esgvoc-1.0.1.dist-info → esgvoc-1.1.1.dist-info}/METADATA +2 -1
- {esgvoc-1.0.1.dist-info → esgvoc-1.1.1.dist-info}/RECORD +40 -29
- esgvoc/apps/jsg/cmip6_template.json +0 -74
- /esgvoc/apps/{py.typed → test_cv/__init__.py} +0 -0
- {esgvoc-1.0.1.dist-info → esgvoc-1.1.1.dist-info}/WHEEL +0 -0
- {esgvoc-1.0.1.dist-info → esgvoc-1.1.1.dist-info}/entry_points.txt +0 -0
- {esgvoc-1.0.1.dist-info → esgvoc-1.1.1.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Example usage of the CV Testing Application
|
|
4
|
+
|
|
5
|
+
This script demonstrates how to use the CVTester class programmatically.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from rich.console import Console
|
|
10
|
+
|
|
11
|
+
from .cv_tester import CVTester
|
|
12
|
+
|
|
13
|
+
console = Console()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def example_test_default_project():
|
|
17
|
+
"""Example: Test a default project with its standard configuration"""
|
|
18
|
+
console.print("[bold blue]Example 1: Testing default obs4mip project[/bold blue]")
|
|
19
|
+
|
|
20
|
+
tester = CVTester()
|
|
21
|
+
|
|
22
|
+
try:
|
|
23
|
+
# Test with default obs4mip configuration
|
|
24
|
+
success = tester.run_complete_test(
|
|
25
|
+
project_name="obs4mip",
|
|
26
|
+
repo_path=".", # Assuming we're in the CV repository
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
if success:
|
|
30
|
+
console.print("[green]✅ Default project test completed successfully[/green]")
|
|
31
|
+
else:
|
|
32
|
+
console.print("[red]❌ Default project test failed[/red]")
|
|
33
|
+
|
|
34
|
+
finally:
|
|
35
|
+
tester.cleanup()
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def example_test_custom_branch():
|
|
39
|
+
"""Example: Test a project with custom branch"""
|
|
40
|
+
console.print("[bold blue]Example 2: Testing obs4mip with custom branch[/bold blue]")
|
|
41
|
+
|
|
42
|
+
tester = CVTester()
|
|
43
|
+
|
|
44
|
+
try:
|
|
45
|
+
# Test with custom branch
|
|
46
|
+
success = tester.run_complete_test(
|
|
47
|
+
project_name="obs4mip",
|
|
48
|
+
branch="test-branch", # Custom branch
|
|
49
|
+
repo_path=".",
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
if success:
|
|
53
|
+
console.print("[green]✅ Custom branch test completed successfully[/green]")
|
|
54
|
+
else:
|
|
55
|
+
console.print("[red]❌ Custom branch test failed[/red]")
|
|
56
|
+
|
|
57
|
+
finally:
|
|
58
|
+
tester.cleanup()
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def example_test_universe_branch_override():
|
|
62
|
+
"""Example: Test with custom universe branch"""
|
|
63
|
+
console.print("[bold blue]Example 2b: Testing input4mip with custom universe branch[/bold blue]")
|
|
64
|
+
|
|
65
|
+
tester = CVTester()
|
|
66
|
+
|
|
67
|
+
try:
|
|
68
|
+
# Test with custom universe branch
|
|
69
|
+
success = tester.run_complete_test(
|
|
70
|
+
project_name="input4mip",
|
|
71
|
+
branch="esgvoc", # Project branch
|
|
72
|
+
universe_branch="esgvoc_dev", # Custom universe branch
|
|
73
|
+
repo_path=".",
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
if success:
|
|
77
|
+
console.print("[green]✅ Universe branch override test completed successfully[/green]")
|
|
78
|
+
else:
|
|
79
|
+
console.print("[red]❌ Universe branch override test failed[/red]")
|
|
80
|
+
|
|
81
|
+
finally:
|
|
82
|
+
tester.cleanup()
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def example_test_custom_repo():
|
|
86
|
+
"""Example: Test with completely custom repository"""
|
|
87
|
+
console.print("[bold blue]Example 3: Testing custom repository[/bold blue]")
|
|
88
|
+
|
|
89
|
+
tester = CVTester()
|
|
90
|
+
|
|
91
|
+
try:
|
|
92
|
+
# Test with custom repo and branch
|
|
93
|
+
success = tester.run_complete_test(
|
|
94
|
+
project_name="obs4mip", # Use obs4mip project structure
|
|
95
|
+
repo_url="https://github.com/my-org/my-custom-cvs",
|
|
96
|
+
branch="main",
|
|
97
|
+
repo_path=".",
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
if success:
|
|
101
|
+
console.print("[green]✅ Custom repository test completed successfully[/green]")
|
|
102
|
+
else:
|
|
103
|
+
console.print("[red]❌ Custom repository test failed[/red]")
|
|
104
|
+
|
|
105
|
+
finally:
|
|
106
|
+
tester.cleanup()
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def example_test_custom_repo_and_universe():
|
|
110
|
+
"""Example: Test with custom repository and custom universe branch"""
|
|
111
|
+
console.print("[bold blue]Example 3b: Testing custom repository with custom universe[/bold blue]")
|
|
112
|
+
|
|
113
|
+
tester = CVTester()
|
|
114
|
+
|
|
115
|
+
try:
|
|
116
|
+
# Test with custom repo, project branch, and universe branch
|
|
117
|
+
success = tester.run_complete_test(
|
|
118
|
+
project_name="obs4mip", # Use obs4mip project structure
|
|
119
|
+
repo_url="https://github.com/my-org/my-custom-cvs",
|
|
120
|
+
branch="main", # Project branch
|
|
121
|
+
universe_branch="development", # Custom universe branch
|
|
122
|
+
repo_path=".",
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
if success:
|
|
126
|
+
console.print("[green]✅ Custom repository and universe test completed successfully[/green]")
|
|
127
|
+
else:
|
|
128
|
+
console.print("[red]❌ Custom repository and universe test failed[/red]")
|
|
129
|
+
|
|
130
|
+
finally:
|
|
131
|
+
tester.cleanup()
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def example_individual_tests():
|
|
135
|
+
"""Example: Run individual test components"""
|
|
136
|
+
console.print("[bold blue]Example 4: Running individual test components[/bold blue]")
|
|
137
|
+
|
|
138
|
+
tester = CVTester()
|
|
139
|
+
|
|
140
|
+
try:
|
|
141
|
+
# Step 1: Configure for testing with universe branch override
|
|
142
|
+
console.print("Step 1: Configuring...")
|
|
143
|
+
if not tester.configure_for_testing(
|
|
144
|
+
project_name="obs4mip",
|
|
145
|
+
branch="main", # Custom project branch
|
|
146
|
+
universe_branch="esgvoc_dev" # Custom universe branch
|
|
147
|
+
):
|
|
148
|
+
console.print("[red]Configuration failed[/red]")
|
|
149
|
+
return
|
|
150
|
+
|
|
151
|
+
# Step 2: Sync CVs
|
|
152
|
+
console.print("Step 2: Synchronizing CVs...")
|
|
153
|
+
if not tester.synchronize_cvs():
|
|
154
|
+
console.print("[red]Synchronization failed[/red]")
|
|
155
|
+
return
|
|
156
|
+
|
|
157
|
+
# Step 3: Test repository structure only
|
|
158
|
+
console.print("Step 3: Testing repository structure...")
|
|
159
|
+
if not tester.test_repository_structure("."):
|
|
160
|
+
console.print("[red]Structure test failed[/red]")
|
|
161
|
+
return
|
|
162
|
+
|
|
163
|
+
# Step 4: Test API access only
|
|
164
|
+
console.print("Step 4: Testing API access...")
|
|
165
|
+
if not tester.test_esgvoc_api_access("obs4mip", "."):
|
|
166
|
+
console.print("[red]API test failed[/red]")
|
|
167
|
+
return
|
|
168
|
+
|
|
169
|
+
console.print("[green]✅ All individual tests completed successfully[/green]")
|
|
170
|
+
|
|
171
|
+
finally:
|
|
172
|
+
tester.cleanup()
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def example_list_available_projects():
|
|
176
|
+
"""Example: List all available projects for testing"""
|
|
177
|
+
console.print("[bold blue]Example 5: Listing available projects[/bold blue]")
|
|
178
|
+
|
|
179
|
+
tester = CVTester()
|
|
180
|
+
projects = tester.get_available_projects()
|
|
181
|
+
|
|
182
|
+
console.print(f"Available projects ({len(projects)}):")
|
|
183
|
+
for project in projects:
|
|
184
|
+
console.print(f" • {project}")
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def main():
|
|
188
|
+
"""Run all examples"""
|
|
189
|
+
console.print("[bold green]CV Testing Application Examples[/bold green]\n")
|
|
190
|
+
|
|
191
|
+
# List available projects
|
|
192
|
+
example_list_available_projects()
|
|
193
|
+
console.print()
|
|
194
|
+
|
|
195
|
+
# Note: The following examples would need actual CV repositories to work
|
|
196
|
+
console.print("[yellow]Note: The following examples require actual CV repositories[/yellow]\n")
|
|
197
|
+
|
|
198
|
+
# Show example configurations (without actually running them)
|
|
199
|
+
console.print("[bold blue]Example configurations:[/bold blue]")
|
|
200
|
+
console.print("1. Test default project: CVTester().run_complete_test('obs4mip')")
|
|
201
|
+
console.print("2. Test custom branch: CVTester().run_complete_test('obs4mip', branch='test-branch')")
|
|
202
|
+
console.print("3. Test custom universe: CVTester().run_complete_test('input4mip', branch='esgvoc', universe_branch='esgvoc_dev')")
|
|
203
|
+
console.print(
|
|
204
|
+
"4. Test custom repo: CVTester().run_complete_test('obs4mip', repo_url='https://github.com/...', branch='main')"
|
|
205
|
+
)
|
|
206
|
+
console.print(
|
|
207
|
+
"5. Test custom repo + universe: CVTester().run_complete_test('obs4mip', repo_url='https://github.com/...', branch='main', universe_branch='dev')"
|
|
208
|
+
)
|
|
209
|
+
console.print(
|
|
210
|
+
"6. Individual tests: configure_for_testing(universe_branch='dev') -> synchronize_cvs() -> test_repository_structure() -> test_esgvoc_api_access()"
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
if __name__ == "__main__":
|
|
215
|
+
main()
|
|
216
|
+
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Variable Registry (VR) App - A simplified tool for creating nested structures from branded variables.
|
|
3
|
+
|
|
4
|
+
This module provides:
|
|
5
|
+
- VRApp: Main application class for querying and structuring branded variables
|
|
6
|
+
- create_nested_structure: Generic function for creating nested structures
|
|
7
|
+
- variable_registry_structure: Function for creating the standard variable registry format
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from .vr_app import VRApp, create_nested_structure, variable_registry_structure
|
|
11
|
+
|
|
12
|
+
__all__ = ["VRApp", "create_nested_structure", "variable_registry_structure"]
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Script to build the variable registry structure from branded variables.
|
|
4
|
+
This creates the nested JSON structure organized by CF Standard Name and Variable Root Name.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
from vr_app import VRApp
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def build_variable_registry():
|
|
12
|
+
"""
|
|
13
|
+
Build the complete variable registry structure.
|
|
14
|
+
|
|
15
|
+
This creates a comprehensive JSON structure with all branded variables
|
|
16
|
+
organized by CF Standard Name and Variable Root Name.
|
|
17
|
+
"""
|
|
18
|
+
print("Building Variable Registry...")
|
|
19
|
+
print("=" * 40)
|
|
20
|
+
|
|
21
|
+
with VRApp() as vr_app:
|
|
22
|
+
# Get all branded variables
|
|
23
|
+
print("Fetching all branded variables from the universe...")
|
|
24
|
+
all_terms = vr_app.get_all_branded_variables()
|
|
25
|
+
print(f"Found {len(all_terms)} total terms")
|
|
26
|
+
|
|
27
|
+
# Get statistics
|
|
28
|
+
stats = vr_app.get_statistics(all_terms)
|
|
29
|
+
print(f"\nStatistics:")
|
|
30
|
+
print(f" Total terms: {stats['total_terms']}")
|
|
31
|
+
print(f" Unique CF Standard Names: {stats['unique_cf_standard_names']}")
|
|
32
|
+
print(f" Unique Variable Root Names: {stats['unique_variable_root_names']}")
|
|
33
|
+
print(f" Unique Realms: {stats['unique_realms']}")
|
|
34
|
+
|
|
35
|
+
# Create complete variable registry
|
|
36
|
+
print("\nCreating complete variable registry...")
|
|
37
|
+
registry_all = vr_app.create_variable_registry()
|
|
38
|
+
|
|
39
|
+
# Create atmospheric variables registry
|
|
40
|
+
print("Creating atmospheric variables registry...")
|
|
41
|
+
registry_atmos = vr_app.create_variable_registry(filters={"realm": "atmos"})
|
|
42
|
+
|
|
43
|
+
# Export structures
|
|
44
|
+
print("\nExporting registry structures...")
|
|
45
|
+
vr_app.export_to_json(registry_all, "variable_registry_complete.json", indent=2)
|
|
46
|
+
vr_app.export_to_json(registry_atmos, "variable_registry_atmos.json", indent=2)
|
|
47
|
+
|
|
48
|
+
print("\n" + "=" * 50)
|
|
49
|
+
print("VARIABLE REGISTRY BUILD COMPLETED!")
|
|
50
|
+
print("=" * 50)
|
|
51
|
+
print("\nGenerated files:")
|
|
52
|
+
print(" - variable_registry_complete.json (all terms)")
|
|
53
|
+
print(" - variable_registry_atmos.json (atmospheric terms only)")
|
|
54
|
+
|
|
55
|
+
# Show sample structure
|
|
56
|
+
print(f"\nRegistry contains {len(registry_all.get('standard_name', {}))} CF Standard Names")
|
|
57
|
+
sample_names = list(registry_all.get("standard_name", {}).keys())[:5]
|
|
58
|
+
print(f"Sample CF Standard Names: {sample_names}")
|
|
59
|
+
|
|
60
|
+
return registry_all
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
if __name__ == "__main__":
|
|
64
|
+
try:
|
|
65
|
+
build_variable_registry()
|
|
66
|
+
except Exception as e:
|
|
67
|
+
print(f"Error building variable registry: {e}")
|
|
68
|
+
import traceback
|
|
69
|
+
|
|
70
|
+
traceback.print_exc()
|
|
71
|
+
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Simple example showing how to use the VR App.
|
|
4
|
+
This demonstrates basic usage patterns for the Variable Registry application.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from vr_app import VRApp
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def main():
|
|
11
|
+
"""
|
|
12
|
+
Example usage of the VR App showing common operations.
|
|
13
|
+
"""
|
|
14
|
+
print("VR App Usage Example")
|
|
15
|
+
print("=" * 20)
|
|
16
|
+
|
|
17
|
+
# Initialize the app using context manager for proper cleanup
|
|
18
|
+
with VRApp() as vr_app:
|
|
19
|
+
# 1. Get basic statistics
|
|
20
|
+
print("\n1. Getting statistics...")
|
|
21
|
+
stats = vr_app.get_statistics()
|
|
22
|
+
print(f" Total terms: {stats['total_terms']}")
|
|
23
|
+
print(f" Unique CF Standard Names: {stats['unique_cf_standard_names']}")
|
|
24
|
+
print(f" Unique Variable Root Names: {stats['unique_variable_root_names']}")
|
|
25
|
+
|
|
26
|
+
# 2. Create variable registry for atmospheric variables
|
|
27
|
+
print("\n2. Creating variable registry for atmospheric variables...")
|
|
28
|
+
atmos_registry = vr_app.create_variable_registry(filters={"realm": "atmos"})
|
|
29
|
+
vr_app.export_to_json(atmos_registry, "example_atmos_registry.json")
|
|
30
|
+
print(" Exported to: example_atmos_registry.json")
|
|
31
|
+
|
|
32
|
+
# 3. Create custom nested structure
|
|
33
|
+
print("\n3. Creating custom structure grouped by realm and CF standard name...")
|
|
34
|
+
custom_structure = vr_app.create_custom_nested_structure(
|
|
35
|
+
group_by_keys=["realm", "cf_standard_name"], metadata_config={0: ["cf_units"], 1: ["cf_sn_status"]}
|
|
36
|
+
)
|
|
37
|
+
vr_app.export_to_json(custom_structure, "example_custom_structure.json")
|
|
38
|
+
print(" Exported to: example_custom_structure.json")
|
|
39
|
+
|
|
40
|
+
# 4. Filter by specific variables
|
|
41
|
+
print("\n4. Filtering specific variables...")
|
|
42
|
+
filtered_terms = vr_app.get_branded_variables_subset({"cf_standard_name": "air_temperature", "realm": "atmos"})
|
|
43
|
+
print(f" Found {len(filtered_terms)} air temperature terms in atmosphere")
|
|
44
|
+
|
|
45
|
+
# Show some sample branding suffixes
|
|
46
|
+
if filtered_terms:
|
|
47
|
+
print(" Sample branding suffixes:")
|
|
48
|
+
for term in filtered_terms[:3]:
|
|
49
|
+
print(f" - {term.branding_suffix_name}")
|
|
50
|
+
|
|
51
|
+
print("\n" + "=" * 40)
|
|
52
|
+
print("Example completed successfully!")
|
|
53
|
+
print("Generated files:")
|
|
54
|
+
print(" - example_atmos_registry.json")
|
|
55
|
+
print(" - example_custom_structure.json")
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
if __name__ == "__main__":
|
|
59
|
+
main()
|
|
60
|
+
|
esgvoc/apps/vr/vr_app.py
ADDED
|
@@ -0,0 +1,333 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from collections import defaultdict
|
|
3
|
+
from typing import Any, Dict, List, Optional, Union
|
|
4
|
+
|
|
5
|
+
from esgvoc import api
|
|
6
|
+
from esgvoc.api import search
|
|
7
|
+
from esgvoc.api.data_descriptors.data_descriptor import DataDescriptor
|
|
8
|
+
from esgvoc.api.data_descriptors.known_branded_variable import KnownBrandedVariable
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def create_nested_structure(
|
|
12
|
+
terms: List[KnownBrandedVariable], group_by_keys: List[str], metadata_config: Optional[Dict[str, List[str]]] = None
|
|
13
|
+
) -> Dict[str, Any]:
|
|
14
|
+
"""
|
|
15
|
+
Create a nested structure from a list of terms using ordered grouping keys.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
terms: List of KnownBrandedVariable terms
|
|
19
|
+
group_by_keys: Ordered list of field names to group by
|
|
20
|
+
metadata_config: Optional dict mapping group levels to metadata field names
|
|
21
|
+
Format: {level_index: [field_names]}
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
Nested dictionary structure
|
|
25
|
+
"""
|
|
26
|
+
if not terms or not group_by_keys:
|
|
27
|
+
return {}
|
|
28
|
+
|
|
29
|
+
metadata_config = metadata_config or {}
|
|
30
|
+
|
|
31
|
+
def _build_nested_dict(
|
|
32
|
+
current_terms: List[KnownBrandedVariable], remaining_keys: List[str], level: int
|
|
33
|
+
) -> Dict[str, Any]:
|
|
34
|
+
if not remaining_keys:
|
|
35
|
+
return [term.model_dump() for term in current_terms]
|
|
36
|
+
|
|
37
|
+
current_key = remaining_keys[0]
|
|
38
|
+
remaining_keys = remaining_keys[1:]
|
|
39
|
+
|
|
40
|
+
grouped = defaultdict(list)
|
|
41
|
+
metadata_by_group = {}
|
|
42
|
+
|
|
43
|
+
for term in current_terms:
|
|
44
|
+
group_value = getattr(term, current_key, None)
|
|
45
|
+
if group_value is not None:
|
|
46
|
+
grouped[group_value].append(term)
|
|
47
|
+
|
|
48
|
+
if level in metadata_config and group_value not in metadata_by_group:
|
|
49
|
+
metadata_by_group[group_value] = {}
|
|
50
|
+
for meta_field in metadata_config[level]:
|
|
51
|
+
metadata_by_group[group_value][meta_field] = getattr(term, meta_field, None)
|
|
52
|
+
|
|
53
|
+
result = {}
|
|
54
|
+
for group_value, group_terms in grouped.items():
|
|
55
|
+
if level in metadata_config:
|
|
56
|
+
result[group_value] = metadata_by_group[group_value].copy()
|
|
57
|
+
|
|
58
|
+
if remaining_keys:
|
|
59
|
+
nested_result = _build_nested_dict(group_terms, remaining_keys.copy(), level + 1)
|
|
60
|
+
result[group_value].update(nested_result)
|
|
61
|
+
else:
|
|
62
|
+
result[group_value]["items"] = [term.model_dump() for term in group_terms]
|
|
63
|
+
else:
|
|
64
|
+
result[group_value] = _build_nested_dict(group_terms, remaining_keys.copy(), level + 1)
|
|
65
|
+
|
|
66
|
+
return result
|
|
67
|
+
|
|
68
|
+
return _build_nested_dict(terms, group_by_keys.copy(), 0)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def variable_registry_structure(terms: List[KnownBrandedVariable]) -> Dict[str, Any]:
|
|
72
|
+
"""
|
|
73
|
+
Create the variable registry structure with CF Standard Name and VariableRootName grouping.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
terms: List of KnownBrandedVariable terms
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
Nested dictionary with the variable registry structure format
|
|
80
|
+
"""
|
|
81
|
+
metadata_config = {
|
|
82
|
+
0: ["cf_units", "cf_sn_status"], # CF Standard Name level
|
|
83
|
+
1: ["var_def_qualifier"], # Variable Root Name level
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
group_by_keys = ["cf_standard_name", "variable_root_name"]
|
|
87
|
+
|
|
88
|
+
nested_data = create_nested_structure(terms, group_by_keys, metadata_config)
|
|
89
|
+
|
|
90
|
+
def _transform_to_registry_format(data: Dict[str, Any]) -> Dict[str, Any]:
|
|
91
|
+
result = {"standard_name": {}}
|
|
92
|
+
|
|
93
|
+
for std_name, std_data in data.items():
|
|
94
|
+
if isinstance(std_data, dict):
|
|
95
|
+
result["standard_name"][std_name] = {
|
|
96
|
+
"units": std_data.get("cf_units", ""),
|
|
97
|
+
"sn_status": std_data.get("cf_sn_status", ""),
|
|
98
|
+
"variable_root_name": {},
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
for var_name, var_data in std_data.items():
|
|
102
|
+
if var_name in ["cf_units", "cf_sn_status"]:
|
|
103
|
+
continue
|
|
104
|
+
|
|
105
|
+
if isinstance(var_data, dict):
|
|
106
|
+
result["standard_name"][std_name]["variable_root_name"][var_name] = {
|
|
107
|
+
"var_def_qualifier": var_data.get("var_def_qualifier", ""),
|
|
108
|
+
"branding_suffix": {},
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
for suffix_name, suffix_data in var_data.items():
|
|
112
|
+
if suffix_name == "var_def_qualifier":
|
|
113
|
+
continue
|
|
114
|
+
|
|
115
|
+
if isinstance(suffix_data, list):
|
|
116
|
+
for term_data in suffix_data:
|
|
117
|
+
if isinstance(term_data, dict):
|
|
118
|
+
suffix_key = term_data.get("branding_suffix_name", "")
|
|
119
|
+
if suffix_key:
|
|
120
|
+
result["standard_name"][std_name]["variable_root_name"][var_name][
|
|
121
|
+
"branding_suffix"
|
|
122
|
+
][suffix_key] = {
|
|
123
|
+
"brand_description": term_data.get("description", ""),
|
|
124
|
+
"bn_status": term_data.get("bn_status", ""),
|
|
125
|
+
"dimensions": term_data.get("dimensions", []),
|
|
126
|
+
"cell_methods": term_data.get("cell_methods", ""),
|
|
127
|
+
"cell_measures": term_data.get("cell_measures", ""),
|
|
128
|
+
"history": term_data.get("history", ""),
|
|
129
|
+
"temporal_label": term_data.get("temporal_label", ""),
|
|
130
|
+
"vertical_label": term_data.get("vertical_label", ""),
|
|
131
|
+
"horizontal_label": term_data.get("horizontal_label", ""),
|
|
132
|
+
"area_label": term_data.get("area_label", ""),
|
|
133
|
+
"realm": term_data.get("realm", ""),
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
return result
|
|
137
|
+
|
|
138
|
+
return _transform_to_registry_format(nested_data)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class VRApp:
|
|
142
|
+
"""
|
|
143
|
+
Variable Restructuring (VR) App for creating nested structures from branded variables.
|
|
144
|
+
|
|
145
|
+
This app allows querying known_branded_variable terms from the universe and
|
|
146
|
+
transforming them into nested JSON structures with customizable grouping.
|
|
147
|
+
"""
|
|
148
|
+
|
|
149
|
+
def __init__(self):
|
|
150
|
+
self.universe_session = search.get_universe_session()
|
|
151
|
+
|
|
152
|
+
def __enter__(self):
|
|
153
|
+
return self
|
|
154
|
+
|
|
155
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
156
|
+
if self.universe_session:
|
|
157
|
+
self.universe_session.close()
|
|
158
|
+
|
|
159
|
+
def get_all_branded_variables(self) -> List[DataDescriptor]:
|
|
160
|
+
"""
|
|
161
|
+
Get all known_branded_variable terms from the universe.
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
List of KnownBrandedVariable terms
|
|
165
|
+
"""
|
|
166
|
+
try:
|
|
167
|
+
terms = api.get_all_terms_in_data_descriptor("known_branded_variable")
|
|
168
|
+
return terms
|
|
169
|
+
except Exception as e:
|
|
170
|
+
print(f"Error fetching branded variables: {e}")
|
|
171
|
+
return []
|
|
172
|
+
|
|
173
|
+
def get_branded_variables_subset(self, filters: Dict[str, Any]) -> List[KnownBrandedVariable]:
|
|
174
|
+
"""
|
|
175
|
+
Get a subset of known_branded_variable terms based on filters.
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
filters: Dictionary of field names and values to filter by
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
List of filtered KnownBrandedVariable terms
|
|
182
|
+
"""
|
|
183
|
+
all_terms = self.get_all_branded_variables()
|
|
184
|
+
filtered_terms = []
|
|
185
|
+
|
|
186
|
+
for term in all_terms:
|
|
187
|
+
match = True
|
|
188
|
+
for field, value in filters.items():
|
|
189
|
+
term_value = getattr(term, field, None)
|
|
190
|
+
if isinstance(value, list):
|
|
191
|
+
if term_value not in value:
|
|
192
|
+
match = False
|
|
193
|
+
break
|
|
194
|
+
elif term_value != value:
|
|
195
|
+
match = False
|
|
196
|
+
break
|
|
197
|
+
|
|
198
|
+
if match:
|
|
199
|
+
filtered_terms.append(term)
|
|
200
|
+
|
|
201
|
+
return filtered_terms
|
|
202
|
+
|
|
203
|
+
def create_custom_nested_structure(
|
|
204
|
+
self,
|
|
205
|
+
terms: Optional[List[KnownBrandedVariable]] = None,
|
|
206
|
+
group_by_keys: List[str] = None,
|
|
207
|
+
metadata_config: Optional[Dict[str, List[str]]] = None,
|
|
208
|
+
filters: Optional[Dict[str, Any]] = None,
|
|
209
|
+
) -> Dict[str, Any]:
|
|
210
|
+
"""
|
|
211
|
+
Create a custom nested structure.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
terms: Optional list of terms. If None, fetches all terms
|
|
215
|
+
group_by_keys: List of field names to group by
|
|
216
|
+
metadata_config: Optional metadata configuration
|
|
217
|
+
filters: Optional filters to apply when fetching terms
|
|
218
|
+
|
|
219
|
+
Returns:
|
|
220
|
+
Nested dictionary structure
|
|
221
|
+
"""
|
|
222
|
+
if terms is None:
|
|
223
|
+
if filters:
|
|
224
|
+
terms = self.get_branded_variables_subset(filters)
|
|
225
|
+
else:
|
|
226
|
+
terms = self.get_all_branded_variables()
|
|
227
|
+
|
|
228
|
+
if not group_by_keys:
|
|
229
|
+
group_by_keys = ["cf_standard_name", "variable_root_name"]
|
|
230
|
+
|
|
231
|
+
return create_nested_structure(terms, group_by_keys, metadata_config)
|
|
232
|
+
|
|
233
|
+
def create_variable_registry(
|
|
234
|
+
self, terms: Optional[List[KnownBrandedVariable]] = None, filters: Optional[Dict[str, Any]] = None
|
|
235
|
+
) -> Dict[str, Any]:
|
|
236
|
+
"""
|
|
237
|
+
Create the variable registry structure with CF Standard Name and VariableRootName grouping.
|
|
238
|
+
|
|
239
|
+
Args:
|
|
240
|
+
terms: Optional list of terms. If None, fetches all terms
|
|
241
|
+
filters: Optional filters to apply when fetching terms
|
|
242
|
+
|
|
243
|
+
Returns:
|
|
244
|
+
Nested dictionary with the variable registry structure format
|
|
245
|
+
"""
|
|
246
|
+
if terms is None:
|
|
247
|
+
if filters:
|
|
248
|
+
terms = self.get_branded_variables_subset(filters)
|
|
249
|
+
else:
|
|
250
|
+
terms = self.get_all_branded_variables()
|
|
251
|
+
|
|
252
|
+
return variable_registry_structure(terms)
|
|
253
|
+
|
|
254
|
+
def export_to_json(self, structure: Dict[str, Any], filename: str, indent: int = 2) -> None:
|
|
255
|
+
"""
|
|
256
|
+
Export a nested structure to a JSON file.
|
|
257
|
+
|
|
258
|
+
Args:
|
|
259
|
+
structure: The nested dictionary structure to export
|
|
260
|
+
filename: Output filename
|
|
261
|
+
indent: JSON indentation level
|
|
262
|
+
"""
|
|
263
|
+
try:
|
|
264
|
+
with open(filename, "w", encoding="utf-8") as f:
|
|
265
|
+
json.dump(structure, f, indent=indent, ensure_ascii=False)
|
|
266
|
+
print(f"Structure exported to {filename}")
|
|
267
|
+
except Exception as e:
|
|
268
|
+
print(f"Error exporting to JSON: {e}")
|
|
269
|
+
|
|
270
|
+
def get_statistics(self, terms: Optional[List[KnownBrandedVariable]] = None) -> Dict[str, Any]:
|
|
271
|
+
"""
|
|
272
|
+
Get statistics about the branded variables.
|
|
273
|
+
|
|
274
|
+
Args:
|
|
275
|
+
terms: Optional list of terms. If None, fetches all terms
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
Dictionary with statistics
|
|
279
|
+
"""
|
|
280
|
+
if terms is None:
|
|
281
|
+
terms = self.get_all_branded_variables()
|
|
282
|
+
|
|
283
|
+
stats = {
|
|
284
|
+
"total_terms": len(terms),
|
|
285
|
+
"unique_cf_standard_names": len(set(term.cf_standard_name for term in terms)),
|
|
286
|
+
"unique_variable_root_names": len(set(term.variable_root_name for term in terms)),
|
|
287
|
+
"unique_realms": len(set(term.realm for term in terms)),
|
|
288
|
+
"status_distribution": {},
|
|
289
|
+
"realm_distribution": {},
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
# Status distribution
|
|
293
|
+
for term in terms:
|
|
294
|
+
status = term.bn_status
|
|
295
|
+
stats["status_distribution"][status] = stats["status_distribution"].get(status, 0) + 1
|
|
296
|
+
|
|
297
|
+
# Realm distribution
|
|
298
|
+
for term in terms:
|
|
299
|
+
realm = term.realm
|
|
300
|
+
stats["realm_distribution"][realm] = stats["realm_distribution"].get(realm, 0) + 1
|
|
301
|
+
|
|
302
|
+
return stats
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def main():
|
|
306
|
+
"""
|
|
307
|
+
Example usage of the VR App.
|
|
308
|
+
"""
|
|
309
|
+
with VRApp() as vr_app:
|
|
310
|
+
# Get statistics
|
|
311
|
+
stats = vr_app.get_statistics()
|
|
312
|
+
print(f"Total terms: {stats['total_terms']}")
|
|
313
|
+
print(f"Unique CF Standard Names: {stats['unique_cf_standard_names']}")
|
|
314
|
+
print(f"Unique Variable Root Names: {stats['unique_variable_root_names']}")
|
|
315
|
+
|
|
316
|
+
# Create variable registry for a subset
|
|
317
|
+
filters = {"realm": "atmos"}
|
|
318
|
+
registry_struct = vr_app.create_variable_registry(filters=filters)
|
|
319
|
+
|
|
320
|
+
# Export to JSON
|
|
321
|
+
vr_app.export_to_json(registry_struct, "variable_registry_atmos.json")
|
|
322
|
+
|
|
323
|
+
# Create custom structure
|
|
324
|
+
custom_struct = vr_app.create_custom_nested_structure(
|
|
325
|
+
group_by_keys=["realm", "cf_standard_name"],
|
|
326
|
+
metadata_config={0: ["bn_status"], 1: ["cf_units", "cf_sn_status"]},
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
vr_app.export_to_json(custom_struct, "custom_structure.json")
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
if __name__ == "__main__":
|
|
333
|
+
main()
|