esgvoc 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. esgvoc/__init__.py +3 -0
  2. esgvoc/api/__init__.py +91 -0
  3. esgvoc/api/data_descriptors/EMD_models/__init__.py +66 -0
  4. esgvoc/api/data_descriptors/EMD_models/arrangement.py +21 -0
  5. esgvoc/api/data_descriptors/EMD_models/calendar.py +5 -0
  6. esgvoc/api/data_descriptors/EMD_models/cell_variable_type.py +20 -0
  7. esgvoc/api/data_descriptors/EMD_models/component_type.py +5 -0
  8. esgvoc/api/data_descriptors/EMD_models/coordinate.py +52 -0
  9. esgvoc/api/data_descriptors/EMD_models/grid_mapping.py +19 -0
  10. esgvoc/api/data_descriptors/EMD_models/grid_region.py +19 -0
  11. esgvoc/api/data_descriptors/EMD_models/grid_type.py +19 -0
  12. esgvoc/api/data_descriptors/EMD_models/horizontal_computational_grid.py +56 -0
  13. esgvoc/api/data_descriptors/EMD_models/horizontal_grid_cells.py +230 -0
  14. esgvoc/api/data_descriptors/EMD_models/horizontal_subgrid.py +41 -0
  15. esgvoc/api/data_descriptors/EMD_models/horizontal_units.py +5 -0
  16. esgvoc/api/data_descriptors/EMD_models/model.py +139 -0
  17. esgvoc/api/data_descriptors/EMD_models/model_component.py +115 -0
  18. esgvoc/api/data_descriptors/EMD_models/reference.py +61 -0
  19. esgvoc/api/data_descriptors/EMD_models/resolution.py +48 -0
  20. esgvoc/api/data_descriptors/EMD_models/temporal_refinement.py +19 -0
  21. esgvoc/api/data_descriptors/EMD_models/truncation_method.py +17 -0
  22. esgvoc/api/data_descriptors/EMD_models/vertical_computational_grid.py +91 -0
  23. esgvoc/api/data_descriptors/EMD_models/vertical_coordinate.py +5 -0
  24. esgvoc/api/data_descriptors/EMD_models/vertical_units.py +19 -0
  25. esgvoc/api/data_descriptors/__init__.py +159 -0
  26. esgvoc/api/data_descriptors/activity.py +72 -0
  27. esgvoc/api/data_descriptors/archive.py +5 -0
  28. esgvoc/api/data_descriptors/area_label.py +30 -0
  29. esgvoc/api/data_descriptors/branded_suffix.py +30 -0
  30. esgvoc/api/data_descriptors/branded_variable.py +21 -0
  31. esgvoc/api/data_descriptors/citation_url.py +5 -0
  32. esgvoc/api/data_descriptors/contact.py +5 -0
  33. esgvoc/api/data_descriptors/conventions.py +28 -0
  34. esgvoc/api/data_descriptors/creation_date.py +18 -0
  35. esgvoc/api/data_descriptors/data_descriptor.py +127 -0
  36. esgvoc/api/data_descriptors/data_specs_version.py +25 -0
  37. esgvoc/api/data_descriptors/date.py +5 -0
  38. esgvoc/api/data_descriptors/directory_date.py +22 -0
  39. esgvoc/api/data_descriptors/drs_specs.py +38 -0
  40. esgvoc/api/data_descriptors/experiment.py +215 -0
  41. esgvoc/api/data_descriptors/forcing_index.py +21 -0
  42. esgvoc/api/data_descriptors/frequency.py +48 -0
  43. esgvoc/api/data_descriptors/further_info_url.py +5 -0
  44. esgvoc/api/data_descriptors/grid.py +43 -0
  45. esgvoc/api/data_descriptors/horizontal_label.py +20 -0
  46. esgvoc/api/data_descriptors/initialization_index.py +27 -0
  47. esgvoc/api/data_descriptors/institution.py +80 -0
  48. esgvoc/api/data_descriptors/known_branded_variable.py +75 -0
  49. esgvoc/api/data_descriptors/license.py +31 -0
  50. esgvoc/api/data_descriptors/member_id.py +9 -0
  51. esgvoc/api/data_descriptors/mip_era.py +26 -0
  52. esgvoc/api/data_descriptors/model_component.py +32 -0
  53. esgvoc/api/data_descriptors/models_test/models.py +17 -0
  54. esgvoc/api/data_descriptors/nominal_resolution.py +50 -0
  55. esgvoc/api/data_descriptors/obs_type.py +5 -0
  56. esgvoc/api/data_descriptors/organisation.py +22 -0
  57. esgvoc/api/data_descriptors/physics_index.py +21 -0
  58. esgvoc/api/data_descriptors/product.py +16 -0
  59. esgvoc/api/data_descriptors/publication_status.py +5 -0
  60. esgvoc/api/data_descriptors/realization_index.py +24 -0
  61. esgvoc/api/data_descriptors/realm.py +16 -0
  62. esgvoc/api/data_descriptors/regex.py +5 -0
  63. esgvoc/api/data_descriptors/region.py +35 -0
  64. esgvoc/api/data_descriptors/resolution.py +7 -0
  65. esgvoc/api/data_descriptors/source.py +120 -0
  66. esgvoc/api/data_descriptors/source_type.py +5 -0
  67. esgvoc/api/data_descriptors/sub_experiment.py +5 -0
  68. esgvoc/api/data_descriptors/table.py +28 -0
  69. esgvoc/api/data_descriptors/temporal_label.py +20 -0
  70. esgvoc/api/data_descriptors/time_range.py +17 -0
  71. esgvoc/api/data_descriptors/title.py +5 -0
  72. esgvoc/api/data_descriptors/tracking_id.py +67 -0
  73. esgvoc/api/data_descriptors/variable.py +56 -0
  74. esgvoc/api/data_descriptors/variant_label.py +25 -0
  75. esgvoc/api/data_descriptors/vertical_label.py +20 -0
  76. esgvoc/api/project_specs.py +143 -0
  77. esgvoc/api/projects.py +1253 -0
  78. esgvoc/api/py.typed +0 -0
  79. esgvoc/api/pydantic_handler.py +146 -0
  80. esgvoc/api/report.py +127 -0
  81. esgvoc/api/search.py +171 -0
  82. esgvoc/api/universe.py +434 -0
  83. esgvoc/apps/__init__.py +6 -0
  84. esgvoc/apps/cmor_tables/__init__.py +7 -0
  85. esgvoc/apps/cmor_tables/cvs_table.py +948 -0
  86. esgvoc/apps/drs/__init__.py +0 -0
  87. esgvoc/apps/drs/constants.py +2 -0
  88. esgvoc/apps/drs/generator.py +429 -0
  89. esgvoc/apps/drs/report.py +540 -0
  90. esgvoc/apps/drs/validator.py +312 -0
  91. esgvoc/apps/ga/__init__.py +104 -0
  92. esgvoc/apps/ga/example_usage.py +315 -0
  93. esgvoc/apps/ga/models/__init__.py +47 -0
  94. esgvoc/apps/ga/models/netcdf_header.py +306 -0
  95. esgvoc/apps/ga/models/validator.py +491 -0
  96. esgvoc/apps/ga/test_ga.py +161 -0
  97. esgvoc/apps/ga/validator.py +277 -0
  98. esgvoc/apps/jsg/json_schema_generator.py +341 -0
  99. esgvoc/apps/jsg/templates/template.jinja +241 -0
  100. esgvoc/apps/test_cv/README.md +214 -0
  101. esgvoc/apps/test_cv/__init__.py +0 -0
  102. esgvoc/apps/test_cv/cv_tester.py +1611 -0
  103. esgvoc/apps/test_cv/example_usage.py +216 -0
  104. esgvoc/apps/vr/__init__.py +12 -0
  105. esgvoc/apps/vr/build_variable_registry.py +71 -0
  106. esgvoc/apps/vr/example_usage.py +60 -0
  107. esgvoc/apps/vr/vr_app.py +333 -0
  108. esgvoc/cli/clean.py +304 -0
  109. esgvoc/cli/cmor.py +46 -0
  110. esgvoc/cli/config.py +1300 -0
  111. esgvoc/cli/drs.py +267 -0
  112. esgvoc/cli/find.py +138 -0
  113. esgvoc/cli/get.py +155 -0
  114. esgvoc/cli/install.py +41 -0
  115. esgvoc/cli/main.py +60 -0
  116. esgvoc/cli/offline.py +269 -0
  117. esgvoc/cli/status.py +79 -0
  118. esgvoc/cli/test_cv.py +258 -0
  119. esgvoc/cli/valid.py +147 -0
  120. esgvoc/core/constants.py +17 -0
  121. esgvoc/core/convert.py +0 -0
  122. esgvoc/core/data_handler.py +206 -0
  123. esgvoc/core/db/__init__.py +3 -0
  124. esgvoc/core/db/connection.py +40 -0
  125. esgvoc/core/db/models/mixins.py +25 -0
  126. esgvoc/core/db/models/project.py +102 -0
  127. esgvoc/core/db/models/universe.py +98 -0
  128. esgvoc/core/db/project_ingestion.py +231 -0
  129. esgvoc/core/db/universe_ingestion.py +172 -0
  130. esgvoc/core/exceptions.py +33 -0
  131. esgvoc/core/logging_handler.py +26 -0
  132. esgvoc/core/repo_fetcher.py +345 -0
  133. esgvoc/core/service/__init__.py +41 -0
  134. esgvoc/core/service/configuration/config_manager.py +196 -0
  135. esgvoc/core/service/configuration/setting.py +363 -0
  136. esgvoc/core/service/data_merger.py +634 -0
  137. esgvoc/core/service/esg_voc.py +77 -0
  138. esgvoc/core/service/resolver_config.py +56 -0
  139. esgvoc/core/service/state.py +324 -0
  140. esgvoc/core/service/string_heuristics.py +98 -0
  141. esgvoc/core/service/term_cache.py +108 -0
  142. esgvoc/core/service/uri_resolver.py +133 -0
  143. esgvoc-2.0.2.dist-info/METADATA +82 -0
  144. esgvoc-2.0.2.dist-info/RECORD +147 -0
  145. esgvoc-2.0.2.dist-info/WHEEL +4 -0
  146. esgvoc-2.0.2.dist-info/entry_points.txt +2 -0
  147. esgvoc-2.0.2.dist-info/licenses/LICENSE.txt +519 -0
@@ -0,0 +1,277 @@
1
+ """
2
+ Main validator interface for NetCDF global attributes.
3
+
4
+ This module provides the high-level API for validating NetCDF global attributes
5
+ against project specifications loaded from the esgvoc database.
6
+ """
7
+
8
+ from typing import Optional, Dict, Any, List
9
+
10
+ import esgvoc.api.projects as projects
11
+ from esgvoc.api.project_specs import AttributeSpecification
12
+ from esgvoc.core.exceptions import EsgvocNotFoundError
13
+ from .models import (
14
+ NetCDFHeader,
15
+ NetCDFHeaderParser,
16
+ ValidationReport,
17
+ ValidationSeverity,
18
+ )
19
+ from .models.validator import GlobalAttributeValidator
20
+
21
+
22
+ class GAValidator:
23
+ """
24
+ Main validator class for the GA (Global Attributes) application.
25
+
26
+ This class provides a high-level interface for validating NetCDF global
27
+ attributes against project specifications loaded from the esgvoc database.
28
+ """
29
+
30
+ def __init__(self, project_id: str = "cmip6"):
31
+ """
32
+ Initialize the GA validator.
33
+
34
+ :param project_id: Project identifier for validation
35
+ """
36
+ self.project_id = project_id
37
+
38
+ # Load attribute specifications from database
39
+ self.attribute_specs = self._load_from_database()
40
+
41
+ # Initialize the validator
42
+ self.validator = GlobalAttributeValidator(self.attribute_specs, project_id)
43
+
44
+ def _load_from_database(self) -> AttributeSpecification:
45
+ """Load attribute specifications from the esgvoc database."""
46
+ project = projects.get_project(self.project_id)
47
+
48
+ if project is None:
49
+ raise EsgvocNotFoundError(f"Project '{self.project_id}' not found in database")
50
+
51
+ if project.attr_specs is None:
52
+ raise ValueError(f"Project '{self.project_id}' has no attribute specifications")
53
+
54
+ return project.attr_specs
55
+
56
+ def validate_from_ncdump(self, ncdump_output: str, filename: Optional[str] = None) -> ValidationReport:
57
+ """
58
+ Validate global attributes from ncdump command output.
59
+
60
+ :param ncdump_output: Output from ncdump -h command
61
+ :param filename: Optional filename for reporting
62
+ :return: Validation report
63
+ """
64
+ # Parse the NetCDF header
65
+ try:
66
+ header = NetCDFHeaderParser.parse_from_ncdump(ncdump_output)
67
+ except Exception as e:
68
+ # Return error report if parsing fails
69
+ report = ValidationReport(filename=filename, project_id=self.project_id, is_valid=False)
70
+ report.add_issue(
71
+ {
72
+ "attribute_name": "parse_error",
73
+ "severity": ValidationSeverity.ERROR,
74
+ "message": f"Failed to parse ncdump output: {str(e)}",
75
+ "actual_value": None,
76
+ "expected_value": None,
77
+ "source_collection": None,
78
+ }
79
+ )
80
+ return report
81
+
82
+ # Set filename if provided
83
+ if filename:
84
+ header.filename = filename
85
+
86
+ # Validate global attributes
87
+ return self.validator.validate(header.global_attributes, header.filename)
88
+
89
+ def validate_from_attributes_dict(
90
+ self, attributes: Dict[str, Any], filename: Optional[str] = None
91
+ ) -> ValidationReport:
92
+ """
93
+ Validate global attributes from a dictionary.
94
+
95
+ :param attributes: Dictionary of global attributes
96
+ :param filename: Optional filename for reporting
97
+ :return: Validation report
98
+ """
99
+ from .models.netcdf_header import NetCDFGlobalAttributes
100
+
101
+ global_attrs = NetCDFGlobalAttributes(attributes=attributes)
102
+ return self.validator.validate(global_attrs, filename)
103
+
104
+
105
+ def get_required_attributes(self) -> List[str]:
106
+ """
107
+ Get list of required attribute names.
108
+
109
+ :return: List of required attribute names
110
+ """
111
+ return [
112
+ spec.field_name or spec.source_collection
113
+ for spec in self.attribute_specs
114
+ if spec.is_required
115
+ ]
116
+
117
+ def get_optional_attributes(self) -> List[str]:
118
+ """
119
+ Get list of optional attribute names.
120
+
121
+ :return: List of optional attribute names
122
+ """
123
+ return [
124
+ spec.field_name or spec.source_collection
125
+ for spec in self.attribute_specs
126
+ if not spec.is_required
127
+ ]
128
+
129
+ def get_attribute_info(self, attribute_name: str) -> Optional[Dict[str, Any]]:
130
+ """
131
+ Get information about a specific attribute.
132
+
133
+ :param attribute_name: Name of the attribute
134
+ :return: Attribute information dictionary or None if not found
135
+ """
136
+ spec = None
137
+ for s in self.attribute_specs:
138
+ field_name = s.field_name or s.source_collection
139
+ if field_name == attribute_name:
140
+ spec = s
141
+ break
142
+
143
+ if spec is None:
144
+ return None
145
+
146
+ return {
147
+ "name": attribute_name,
148
+ "source_collection": spec.source_collection,
149
+ "value_type": spec.value_type,
150
+ "required": spec.is_required,
151
+ "default_value": spec.default_value,
152
+ "specific_key": spec.specific_key,
153
+ }
154
+
155
+ def list_attributes(self) -> List[str]:
156
+ """
157
+ Get list of all defined attribute names.
158
+
159
+ :return: List of all attribute names
160
+ """
161
+ return [spec.field_name or spec.source_collection for spec in self.attribute_specs]
162
+
163
+ def reload_config(self) -> None:
164
+ """
165
+ Reload attribute specifications from the database.
166
+ """
167
+ self.attribute_specs = self._load_from_database()
168
+ self.validator = GlobalAttributeValidator(self.attribute_specs, self.project_id)
169
+
170
+
171
+ class GAValidatorFactory:
172
+ """
173
+ Factory for creating GA validators for different projects.
174
+ """
175
+
176
+ @staticmethod
177
+ def create_cmip6_validator() -> GAValidator:
178
+ """
179
+ Create a validator configured for CMIP6.
180
+
181
+ :return: GAValidator instance for CMIP6
182
+ """
183
+ return GAValidator(project_id="cmip6")
184
+
185
+ @staticmethod
186
+ def create_cmip7_validator() -> GAValidator:
187
+ """
188
+ Create a validator configured for CMIP7.
189
+
190
+ :return: GAValidator instance for CMIP7
191
+ """
192
+ return GAValidator(project_id="cmip7")
193
+
194
+
195
+ def validate_netcdf_attributes(
196
+ ncdump_output: str, project_id: str = "cmip6", filename: Optional[str] = None
197
+ ) -> ValidationReport:
198
+ """
199
+ Convenience function to validate NetCDF global attributes.
200
+
201
+ Loads attribute specifications from the esgvoc database for the specified project.
202
+
203
+ :param ncdump_output: Output from ncdump -h command
204
+ :param project_id: Project identifier for validation
205
+ :param filename: Optional filename for reporting
206
+ :return: Validation report
207
+ """
208
+ validator = GAValidator(project_id)
209
+ return validator.validate_from_ncdump(ncdump_output, filename)
210
+
211
+
212
+ def create_validation_summary(report: ValidationReport) -> str:
213
+ """
214
+ Create a human-readable summary of a validation report.
215
+
216
+ :param report: Validation report to summarize
217
+ :return: Formatted summary string
218
+ """
219
+ lines = []
220
+ lines.append("=" * 60)
221
+ lines.append("NetCDF Global Attributes Validation Report")
222
+ lines.append("=" * 60)
223
+
224
+ if report.filename:
225
+ lines.append(f"File: {report.filename}")
226
+ lines.append(f"Project: {report.project_id}")
227
+ lines.append(f"Status: {'VALID' if report.is_valid else 'INVALID'}")
228
+ lines.append("")
229
+
230
+ # Summary statistics
231
+ lines.append("Summary:")
232
+ lines.append(f" • Errors: {report.error_count}")
233
+ lines.append(f" • Warnings: {report.warning_count}")
234
+ lines.append(f" • Info messages: {report.info_count}")
235
+ lines.append(f" • Validated attributes: {len(report.validated_attributes)}")
236
+ lines.append(f" • Missing required attributes: {len(report.missing_attributes)}")
237
+ lines.append(f" • Extra attributes: {len(report.extra_attributes)}")
238
+ lines.append("")
239
+
240
+ # Issues by severity
241
+ if report.issues:
242
+ lines.append("Issues:")
243
+ lines.append("")
244
+
245
+ for severity in [ValidationSeverity.ERROR, ValidationSeverity.WARNING, ValidationSeverity.INFO]:
246
+ severity_issues = report.get_issues_by_severity(severity)
247
+ if severity_issues:
248
+ lines.append(f"{severity.value.upper()}S:")
249
+ for i, issue in enumerate(severity_issues):
250
+ lines.append(f" • {issue.attribute_name}: {issue.message}")
251
+ if issue.expected_value is not None:
252
+ lines.append(f" Expected: {issue.expected_value}")
253
+ if issue.actual_value is not None:
254
+ lines.append(f" Actual: {issue.actual_value}")
255
+
256
+ # Add separator between errors (except for the last one)
257
+ if i < len(severity_issues) - 1:
258
+ lines.append(" " + "-" * 50)
259
+ lines.append("")
260
+ lines.append("")
261
+
262
+ # Missing attributes
263
+ if report.missing_attributes:
264
+ lines.append("Missing Required Attributes:")
265
+ for attr in report.missing_attributes:
266
+ lines.append(f" • {attr}")
267
+ lines.append("")
268
+
269
+ # Extra attributes
270
+ if report.extra_attributes:
271
+ lines.append("Extra Attributes (not in specification):")
272
+ for attr in report.extra_attributes:
273
+ lines.append(f" • {attr}")
274
+ lines.append("")
275
+
276
+ lines.append("=" * 60)
277
+ return "\n".join(lines)
@@ -0,0 +1,341 @@
1
+ import json
2
+ from dataclasses import dataclass
3
+ from itertools import combinations, product
4
+ from pathlib import Path
5
+ from typing import Sequence
6
+
7
+ from jinja2 import Environment, FileSystemLoader
8
+ from sqlmodel import Session
9
+
10
+ from esgvoc.api import projects, search
11
+ from esgvoc.api.project_specs import CatalogProperty, DrsType
12
+ from esgvoc.core.constants import COMPOSITE_REQUIRED_KEY, DRS_SPECS_JSON_KEY, PATTERN_JSON_KEY
13
+ from esgvoc.core.db.models.project import PCollection, PTerm, TermKind
14
+ from esgvoc.core.db.models.universe import UTerm
15
+ from esgvoc.core.exceptions import EsgvocException, EsgvocNotFoundError, EsgvocNotImplementedError, EsgvocValueError
16
+
17
+ KEY_SEPARATOR = ':'
18
+ TEMPLATE_DIR_NAME = 'templates'
19
+ TEMPLATE_DIR_PATH = Path(__file__).parent.joinpath(TEMPLATE_DIR_NAME)
20
+ TEMPLATE_FILE_NAME = 'template.jinja'
21
+ JSON_INDENTATION = 2
22
+
23
+
24
+ @dataclass
25
+ class _CatalogProperty:
26
+ field_name: str
27
+ field_value: dict
28
+ is_required: bool
29
+
30
+
31
+ def _process_col_plain_terms(collection: PCollection, source_collection_key: str) -> tuple[str, list[str]]:
32
+ property_values: set[str] = set()
33
+ for term in collection.terms:
34
+ property_key, property_value = _process_plain_term(term, source_collection_key)
35
+ property_values.add(property_value)
36
+ # Filter out None values before sorting to avoid TypeError
37
+ filtered_values = [v for v in property_values if v is not None]
38
+ return property_key, sorted(filtered_values) # type: ignore
39
+
40
+
41
+ def _process_plain_term(term: PTerm, source_collection_key: str) -> tuple[str, str]:
42
+ if source_collection_key in term.specs:
43
+ property_value = term.specs[source_collection_key]
44
+ else:
45
+ raise EsgvocNotFoundError(f'missing key {source_collection_key} for term {term.id} in ' +
46
+ f'collection {term.collection.id}')
47
+ return 'enum', property_value
48
+
49
+
50
+ def _process_col_composite_terms(collection: PCollection, universe_session: Session,
51
+ project_session: Session) -> tuple[str, list[str | dict], bool]:
52
+ result: list[str | dict] = list()
53
+ property_key = ""
54
+ has_pattern = False
55
+ for term in collection.terms:
56
+ property_key, property_value, _has_pattern = _process_composite_term(term, universe_session,
57
+ project_session)
58
+ if isinstance(property_value, list):
59
+ result.extend(property_value)
60
+ else:
61
+ result.append(property_value)
62
+ has_pattern |= _has_pattern
63
+ return property_key, result, has_pattern
64
+
65
+
66
+ def _inner_process_composite_term(resolved_term: UTerm | PTerm,
67
+ universe_session: Session,
68
+ project_session: Session) -> tuple[str | list, bool]:
69
+ is_pattern = False
70
+ match resolved_term.kind:
71
+ case TermKind.PLAIN:
72
+ result = resolved_term.specs[DRS_SPECS_JSON_KEY]
73
+ case TermKind.PATTERN:
74
+ result = resolved_term.specs[PATTERN_JSON_KEY].replace('^', '').replace('$', '')
75
+ is_pattern = True
76
+ case TermKind.COMPOSITE:
77
+ _, result, is_pattern = _process_composite_term(resolved_term, universe_session,
78
+ project_session)
79
+ case _:
80
+ msg = f"unsupported term kind '{resolved_term.kind}'"
81
+ raise EsgvocNotImplementedError(msg)
82
+ return result, is_pattern
83
+
84
+
85
+ def _accumulate_resolved_part(resolved_part: list,
86
+ resolved_term: UTerm | PTerm,
87
+ universe_session: Session,
88
+ project_session: Session) -> bool:
89
+ tmp, has_pattern = _inner_process_composite_term(resolved_term, universe_session,
90
+ project_session)
91
+ if isinstance(tmp, list):
92
+ resolved_part.extend(tmp)
93
+ else:
94
+ resolved_part.append(tmp)
95
+ return has_pattern
96
+
97
+
98
+ def _generate_combinations(items_parts: list[list], required_parts: list[bool]) -> list[list]:
99
+ number_of_parts = len(items_parts)
100
+ required_indexes = {index for index, required in enumerate(required_parts) if required}
101
+ result = list()
102
+ # Generate all the combination of item lists.
103
+ for r in range(1, number_of_parts + 1): # Some optional list may or may not be included.
104
+ # According to the doc, combination respect the list order.
105
+ for index_subset in combinations(range(number_of_parts), r):
106
+ # Only keep combinations with the required item lists.
107
+ if required_indexes.issubset(index_subset):
108
+ result.append([items_parts[index] for index in index_subset])
109
+ return result
110
+
111
+
112
+ def _process_composite_term(term: UTerm | PTerm, universe_session: Session,
113
+ project_session: Session) -> tuple[str, list[str | dict], bool]:
114
+ items_parts: list[list[str]] = list()
115
+ required_parts: list[bool] = list()
116
+ separator, parts = projects._get_composite_term_separator_parts(term)
117
+ has_pattern = False
118
+ for part in parts:
119
+ resolved_term = projects._resolve_composite_term_part(part, universe_session, project_session)
120
+ resolved_part = list()
121
+ if isinstance(resolved_term, Sequence):
122
+ for r_term in resolved_term:
123
+ has_pattern |= _accumulate_resolved_part(resolved_part, r_term, universe_session,
124
+ project_session)
125
+ else:
126
+ has_pattern = _accumulate_resolved_part(resolved_part, resolved_term, universe_session,
127
+ project_session)
128
+ items_parts.append(resolved_part)
129
+ required_parts.append(part[COMPOSITE_REQUIRED_KEY])
130
+ property_values: list[str | dict] = list()
131
+ combinations = _generate_combinations(items_parts, required_parts)
132
+ for combination in combinations:
133
+ for product_result in product(*combination):
134
+ # Patterns terms are meant to be validated individually.
135
+ # So their regex are defined as a whole (begins by a ^, ends by a $).
136
+ # As the pattern is a concatenation of plain or regex, multiple ^ and $ can exist.
137
+ # The later, must be removed.
138
+ tmp = separator.join(product_result)
139
+ if has_pattern:
140
+ tmp = f'^{tmp}$'
141
+ tmp = {'pattern': tmp}
142
+ property_values.append(tmp)
143
+ property_key = 'anyOf' if has_pattern else 'enum'
144
+ return property_key, property_values, has_pattern
145
+
146
+
147
+ def _process_col_pattern_terms(collection: PCollection) -> tuple[str, str | list[dict]]:
148
+ if len(collection.terms) == 1:
149
+ term = collection.terms[0]
150
+ property_key, property_value = _process_pattern_term(term)
151
+ else:
152
+ property_key = 'anyOf'
153
+ property_value = list()
154
+ for term in collection.terms:
155
+ pkey, pvalue = _process_pattern_term(term)
156
+ property_value.append({pkey: pvalue})
157
+ return property_key, property_value
158
+
159
+
160
+ def _process_pattern_term(term: PTerm) -> tuple[str, str]:
161
+ return 'pattern', term.specs[PATTERN_JSON_KEY]
162
+
163
+
164
+ class CatalogPropertiesJsonTranslator:
165
+ def __init__(self, project_id: str) -> None:
166
+ self.project_id = project_id
167
+ # Project session can't be None here.
168
+ self.universe_session: Session = search.get_universe_session()
169
+ self.project_session: Session = projects._get_project_session_with_exception(project_id)
170
+ self.collections: dict[str, PCollection] = dict()
171
+ for collection in projects._get_all_collections_in_project(self.project_session):
172
+ self.collections[collection.id] = collection
173
+
174
+ def __exit__(self, exception_type, exception_value, exception_traceback):
175
+ self.project_session.close()
176
+ self.universe_session.close()
177
+ if exception_type is not None:
178
+ raise exception_value
179
+ return True
180
+
181
+ def _translate_property_value(self, catalog_property: CatalogProperty) \
182
+ -> tuple[str | None, str | list[str] | list[str | dict] | None]:
183
+ property_key: str | None
184
+ property_value: str | list[str] | list[str | dict] | None
185
+
186
+ # Properties unrelated to collections of project.
187
+ if catalog_property.source_collection is None:
188
+ property_key = None
189
+ property_value = None
190
+ elif catalog_property.source_collection not in self.collections:
191
+ raise EsgvocNotFoundError(f"collection '{catalog_property.source_collection}' is not found")
192
+ else:
193
+ if catalog_property.source_collection_key is None:
194
+ source_collection_key = DRS_SPECS_JSON_KEY
195
+ else:
196
+ source_collection_key = catalog_property.source_collection_key
197
+
198
+ if catalog_property.source_collection_term is None:
199
+ collection = self.collections[catalog_property.source_collection]
200
+ match collection.term_kind:
201
+ case TermKind.PLAIN:
202
+ property_key, property_value = _process_col_plain_terms(
203
+ collection=collection,
204
+ source_collection_key=source_collection_key)
205
+ case TermKind.COMPOSITE:
206
+ property_key, property_value, _ = _process_col_composite_terms(
207
+ collection=collection,
208
+ universe_session=self.universe_session,
209
+ project_session=self.project_session)
210
+ case TermKind.PATTERN:
211
+ property_key, property_value = _process_col_pattern_terms(collection)
212
+ case _:
213
+ msg = f"unsupported term kind '{collection.term_kind}'"
214
+ raise EsgvocNotImplementedError(msg)
215
+ else:
216
+ pterm_found = projects._get_term_in_collection(
217
+ session=self.project_session,
218
+ collection_id=catalog_property.source_collection,
219
+ term_id=catalog_property.source_collection_term)
220
+ if pterm_found is None:
221
+ raise EsgvocValueError(f"term '{catalog_property.source_collection_term}' is not " +
222
+ f"found in collection '{catalog_property.source_collection}'")
223
+ match pterm_found.kind:
224
+ case TermKind.PLAIN:
225
+ property_key, property_value = _process_plain_term(
226
+ term=pterm_found,
227
+ source_collection_key=source_collection_key)
228
+ case TermKind.COMPOSITE:
229
+ property_key, property_value, _ = _process_composite_term(
230
+ term=pterm_found,
231
+ universe_session=self.universe_session,
232
+ project_session=self.project_session)
233
+ case TermKind.PATTERN:
234
+ property_key, property_value = _process_pattern_term(term=pterm_found)
235
+ case _:
236
+ msg = f"unsupported term kind '{pterm_found.kind}'"
237
+ raise EsgvocNotImplementedError(msg)
238
+ return property_key, property_value
239
+
240
+ def translate_property(self, catalog_property: CatalogProperty) -> _CatalogProperty:
241
+ property_key, property_value = self._translate_property_value(catalog_property)
242
+ field_value = dict()
243
+ if 'array' in catalog_property.catalog_field_value_type:
244
+ field_value['type'] = 'array'
245
+ root_property = dict()
246
+ field_value['items'] = root_property
247
+ root_property['type'] = catalog_property.catalog_field_value_type.split('_')[0]
248
+ root_property['minItems'] = 1
249
+ else:
250
+ field_value['type'] = catalog_property.catalog_field_value_type
251
+ root_property = field_value
252
+
253
+ if (property_key is not None) and (property_value is not None):
254
+ root_property[property_key] = property_value
255
+
256
+ if catalog_property.catalog_field_name is None:
257
+ attribute_name = catalog_property.source_collection
258
+ else:
259
+ attribute_name = catalog_property.catalog_field_name
260
+ field_name = CatalogPropertiesJsonTranslator._translate_field_name(self.project_id,
261
+ attribute_name)
262
+ return _CatalogProperty(field_name=field_name,
263
+ field_value=field_value,
264
+ is_required=catalog_property.is_required)
265
+
266
+ @staticmethod
267
+ def _translate_field_name(project_id: str, attribute_name) -> str:
268
+ return f'{project_id}{KEY_SEPARATOR}{attribute_name}'
269
+
270
+
271
+ def _catalog_properties_json_processor(property_translator: CatalogPropertiesJsonTranslator,
272
+ properties: list[CatalogProperty]) -> list[_CatalogProperty]:
273
+ result: list[_CatalogProperty] = list()
274
+ for dataset_property_spec in properties:
275
+ catalog_property = property_translator.translate_property(dataset_property_spec)
276
+ result.append(catalog_property)
277
+ return result
278
+
279
+
280
+ def generate_json_schema(project_id: str) -> dict:
281
+ """
282
+ Generate json schema for the given project.
283
+
284
+ :param project_id: The id of the given project.
285
+ :type project_id: str
286
+ :returns: The root node of a json schema.
287
+ :rtype: dict
288
+ :raises EsgvocValueError: On wrong information in catalog_specs.
289
+ :raises EsgvocNotFoundError: On missing information in catalog_specs.
290
+ :raises EsgvocNotImplementedError: On unexpected operations resulted in wrong information in catalog_specs).
291
+ :raises EsgvocException: On json compliance error.
292
+ """
293
+ project_specs = projects.get_project(project_id)
294
+ if project_specs is not None:
295
+ catalog_specs = project_specs.catalog_specs
296
+ if catalog_specs is not None:
297
+ env = Environment(loader=FileSystemLoader(TEMPLATE_DIR_PATH)) # noqa: S701
298
+ template = env.get_template(TEMPLATE_FILE_NAME)
299
+ extension_specs = dict()
300
+ for catalog_extension in catalog_specs.catalog_properties.extensions:
301
+ catalog_extension_name = catalog_extension.name.replace('-', '_')
302
+ extension_specs[f'{catalog_extension_name}_extension_version'] = catalog_extension.version
303
+ drs_dataset_id_regex = project_specs.drs_specs[DrsType.DATASET_ID].regex
304
+ property_translator = CatalogPropertiesJsonTranslator(project_id)
305
+ catalog_dataset_properties = \
306
+ _catalog_properties_json_processor(property_translator,
307
+ catalog_specs.dataset_properties)
308
+
309
+ catalog_file_properties = \
310
+ _catalog_properties_json_processor(property_translator,
311
+ catalog_specs.file_properties)
312
+ del property_translator
313
+ json_raw_str = template.render(project_id=project_id,
314
+ catalog_version=catalog_specs.version,
315
+ drs_dataset_id_regex=drs_dataset_id_regex,
316
+ catalog_dataset_properties=catalog_dataset_properties,
317
+ catalog_file_properties=catalog_file_properties,
318
+ **extension_specs)
319
+ # Json compliance checking.
320
+ try:
321
+ result = json.loads(json_raw_str)
322
+ return result
323
+ except Exception as e:
324
+ raise EsgvocException(f'JSON error: {e}. Dump raw:\n{json_raw_str}') from e
325
+ else:
326
+ raise EsgvocNotFoundError(f"catalog properties for the project '{project_id}' " +
327
+ "are missing")
328
+ else:
329
+ raise EsgvocNotFoundError(f"unknown project '{project_id}'")
330
+
331
+
332
+ def pretty_print_json_node(obj: dict) -> str:
333
+ """
334
+ Serialize a dictionary into json format.
335
+
336
+ :param obj: The dictionary.
337
+ :type obj: dict
338
+ :returns: a string that represents the dictionary in json format.
339
+ :rtype: str
340
+ """
341
+ return json.dumps(obj, indent=JSON_INDENTATION)