esgvoc 0.1.2__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of esgvoc might be problematic. Click here for more details.

Files changed (78) hide show
  1. esgvoc/__init__.py +3 -1
  2. esgvoc/api/__init__.py +30 -30
  3. esgvoc/api/_utils.py +28 -14
  4. esgvoc/api/data_descriptors/__init__.py +19 -10
  5. esgvoc/api/data_descriptors/activity.py +8 -45
  6. esgvoc/api/data_descriptors/area_label.py +6 -0
  7. esgvoc/api/data_descriptors/branded_suffix.py +5 -0
  8. esgvoc/api/data_descriptors/branded_variable.py +5 -0
  9. esgvoc/api/data_descriptors/consortium.py +16 -56
  10. esgvoc/api/data_descriptors/data_descriptor.py +106 -0
  11. esgvoc/api/data_descriptors/date.py +3 -46
  12. esgvoc/api/data_descriptors/directory_date.py +5 -0
  13. esgvoc/api/data_descriptors/experiment.py +19 -54
  14. esgvoc/api/data_descriptors/forcing_index.py +3 -45
  15. esgvoc/api/data_descriptors/frequency.py +6 -43
  16. esgvoc/api/data_descriptors/grid_label.py +6 -44
  17. esgvoc/api/data_descriptors/horizontal_label.py +6 -0
  18. esgvoc/api/data_descriptors/initialisation_index.py +3 -44
  19. esgvoc/api/data_descriptors/institution.py +11 -54
  20. esgvoc/api/data_descriptors/license.py +4 -44
  21. esgvoc/api/data_descriptors/mip_era.py +6 -44
  22. esgvoc/api/data_descriptors/model_component.py +7 -45
  23. esgvoc/api/data_descriptors/organisation.py +3 -40
  24. esgvoc/api/data_descriptors/physic_index.py +3 -45
  25. esgvoc/api/data_descriptors/product.py +4 -43
  26. esgvoc/api/data_descriptors/realisation_index.py +3 -44
  27. esgvoc/api/data_descriptors/realm.py +4 -42
  28. esgvoc/api/data_descriptors/resolution.py +6 -44
  29. esgvoc/api/data_descriptors/source.py +18 -53
  30. esgvoc/api/data_descriptors/source_type.py +3 -41
  31. esgvoc/api/data_descriptors/sub_experiment.py +3 -41
  32. esgvoc/api/data_descriptors/table.py +6 -48
  33. esgvoc/api/data_descriptors/temporal_label.py +6 -0
  34. esgvoc/api/data_descriptors/time_range.py +3 -27
  35. esgvoc/api/data_descriptors/variable.py +13 -71
  36. esgvoc/api/data_descriptors/variant_label.py +3 -47
  37. esgvoc/api/data_descriptors/vertical_label.py +5 -0
  38. esgvoc/api/project_specs.py +82 -0
  39. esgvoc/api/projects.py +284 -238
  40. esgvoc/api/report.py +89 -52
  41. esgvoc/api/search.py +31 -11
  42. esgvoc/api/universe.py +57 -48
  43. esgvoc/apps/__init__.py +6 -0
  44. esgvoc/apps/drs/__init__.py +0 -16
  45. esgvoc/apps/drs/constants.py +2 -0
  46. esgvoc/apps/drs/generator.py +429 -0
  47. esgvoc/apps/drs/report.py +492 -0
  48. esgvoc/apps/drs/validator.py +330 -0
  49. esgvoc/cli/drs.py +248 -0
  50. esgvoc/cli/get.py +26 -25
  51. esgvoc/cli/install.py +11 -8
  52. esgvoc/cli/main.py +4 -5
  53. esgvoc/cli/status.py +14 -2
  54. esgvoc/cli/valid.py +41 -45
  55. esgvoc/core/db/models/mixins.py +7 -0
  56. esgvoc/core/db/models/project.py +3 -8
  57. esgvoc/core/db/models/universe.py +3 -3
  58. esgvoc/core/db/project_ingestion.py +4 -1
  59. esgvoc/core/db/universe_ingestion.py +8 -7
  60. esgvoc/core/logging_handler.py +1 -1
  61. esgvoc/core/repo_fetcher.py +4 -3
  62. esgvoc/core/service/__init__.py +37 -5
  63. esgvoc/core/service/configuration/config_manager.py +188 -0
  64. esgvoc/core/service/configuration/setting.py +88 -0
  65. esgvoc/core/service/state.py +66 -42
  66. esgvoc-0.3.0.dist-info/METADATA +89 -0
  67. esgvoc-0.3.0.dist-info/RECORD +78 -0
  68. esgvoc-0.3.0.dist-info/licenses/LICENSE.txt +519 -0
  69. esgvoc/apps/drs/models.py +0 -43
  70. esgvoc/apps/drs/parser.py +0 -27
  71. esgvoc/cli/config.py +0 -79
  72. esgvoc/core/service/settings.py +0 -64
  73. esgvoc/core/service/settings.toml +0 -12
  74. esgvoc/core/service/settings_default.toml +0 -20
  75. esgvoc-0.1.2.dist-info/METADATA +0 -54
  76. esgvoc-0.1.2.dist-info/RECORD +0 -66
  77. {esgvoc-0.1.2.dist-info → esgvoc-0.3.0.dist-info}/WHEEL +0 -0
  78. {esgvoc-0.1.2.dist-info → esgvoc-0.3.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,330 @@
1
+ from typing import cast
2
+
3
+ import esgvoc.api.projects as projects
4
+ import esgvoc.apps.drs.constants as constants
5
+ from esgvoc.api import APIException
6
+ from esgvoc.api.project_specs import (DrsCollection, DrsConstant, DrsPart,
7
+ DrsPartKind, DrsSpecification, DrsType,
8
+ ProjectSpecs)
9
+ from esgvoc.apps.drs.report import (BlankTerm, ComplianceIssue, DrsIssue,
10
+ DrsValidationReport, ExtraChar,
11
+ ExtraSeparator, ExtraTerm,
12
+ FileNameExtensionIssue, InvalidTerm,
13
+ MissingTerm, ParsingIssue, Space,
14
+ Unparsable, ValidationError,
15
+ ValidationWarning)
16
+
17
+
18
+ class DrsApplication:
19
+ """
20
+ Generic DRS application class.
21
+ """
22
+
23
+ def __init__(self, project_id: str, pedantic: bool = False) -> None:
24
+ self.project_id: str = project_id
25
+ """The project id."""
26
+ self.pedantic: bool = pedantic
27
+ """Same as the option of GCC: turn warnings into errors. Default False."""
28
+ project_specs: ProjectSpecs|None = projects.find_project(project_id)
29
+ if not project_specs:
30
+ raise APIException(f'unable to find project {project_id}')
31
+ for specs in project_specs.drs_specs:
32
+ match specs.type:
33
+ case DrsType.DIRECTORY:
34
+ self.directory_specs: DrsSpecification = specs
35
+ """The DRS directory specs of the project."""
36
+ case DrsType.FILE_NAME:
37
+ self.file_name_specs: DrsSpecification = specs
38
+ """The DRS file name specs of the project."""
39
+ case DrsType.DATASET_ID:
40
+ self.dataset_id_specs: DrsSpecification = specs
41
+ """The DRS dataset id specs of the project."""
42
+ case _:
43
+ raise RuntimeError(f'unsupported DRS specs type {specs.type}')
44
+
45
+ def _get_full_file_name_extension(self) -> str:
46
+ """
47
+ Returns the full file name extension (the separator plus the extension) of the DRS file
48
+ name specs of the project.
49
+
50
+ :returns: The full file name extension.
51
+ :rtype: str
52
+ """
53
+ specs: DrsSpecification = self.file_name_specs
54
+ if specs.properties:
55
+ full_extension = specs.properties[constants.FILE_NAME_EXTENSION_SEPARATOR_KEY] + \
56
+ specs.properties[constants.FILE_NAME_EXTENSION_KEY]
57
+ else:
58
+ raise RuntimeError('missing properties in the DRS file name specifications of the ' +
59
+ f'project {self.project_id}')
60
+ return full_extension
61
+
62
+
63
+ class DrsValidator(DrsApplication):
64
+ """
65
+ Valid a DRS directory, dataset id and file name expression against a project.
66
+ """
67
+
68
+ def validate_directory(self, drs_expression: str,
69
+ prefix: str|None = None) -> DrsValidationReport:
70
+ """
71
+ Validate a DRS directory expression.
72
+
73
+ :param drs_expression: A DRS directory expression.
74
+ :type drs_expression: str
75
+ :param prefix: A directory prefix to be removed from the directory expression.
76
+ :type prefix: str|None
77
+ :returns: A validation report.
78
+ :rtype: DrsValidationReport
79
+ """
80
+ if prefix:
81
+ # Remove prefix if present. Always returns a copy.
82
+ drs_expression = drs_expression.removeprefix(prefix)
83
+ return self._validate(drs_expression, self.directory_specs)
84
+
85
+ def validate_dataset_id(self, drs_expression: str) -> DrsValidationReport:
86
+ """
87
+ Validate a DRS dataset id expression.
88
+
89
+ :param drs_expression: A DRS dataset id expression.
90
+ :type drs_expression: str
91
+ :returns: A validation report.
92
+ :rtype: DrsValidationReport
93
+ """
94
+ return self._validate(drs_expression, self.dataset_id_specs)
95
+
96
+ def validate_file_name(self, drs_expression: str) -> DrsValidationReport:
97
+ """
98
+ Validate a file name expression.
99
+
100
+ :param drs_expression: A DRS file name expression.
101
+ :type drs_expression: str
102
+ :returns: A validation report.
103
+ :rtype: DrsValidationReport
104
+ """
105
+ full_extension = self._get_full_file_name_extension()
106
+ if drs_expression.endswith(full_extension):
107
+ drs_expression = drs_expression.replace(full_extension, '')
108
+ result = self._validate(drs_expression, self.file_name_specs)
109
+ else:
110
+ issue = FileNameExtensionIssue(expected_extension=full_extension)
111
+ result = self._create_report(self.file_name_specs.type, drs_expression,
112
+ [issue], [])
113
+ return result
114
+
115
+ def validate(self, drs_expression: str, drs_type: DrsType|str) -> DrsValidationReport:
116
+ """
117
+ Validate a DRS expression.
118
+
119
+ :param drs_expression: A DRS expression.
120
+ :type drs_expression: str
121
+ :param drs_type: The type of the given DRS expression (directory, file_name or dataset_id)
122
+ :type drs_type: DrsType|str
123
+ :returns: A validation report.
124
+ :rtype: DrsValidationReport
125
+ """
126
+ match drs_type:
127
+ case DrsType.DIRECTORY:
128
+ return self.validate_directory(drs_expression=drs_expression)
129
+ case DrsType.FILE_NAME:
130
+ return self.validate_file_name(drs_expression=drs_expression)
131
+ case DrsType.DATASET_ID:
132
+ return self.validate_dataset_id(drs_expression=drs_expression)
133
+ case _:
134
+ raise RuntimeError(f'unsupported drs type {drs_type}')
135
+
136
+ def _parse(self,
137
+ drs_expression: str,
138
+ separator: str,
139
+ drs_type: DrsType) -> tuple[list[str]|None, # terms
140
+ list[DrsIssue], # Errors
141
+ list[DrsIssue]]: # Warnings
142
+ errors: list[DrsIssue] = list()
143
+ warnings: list[DrsIssue] = list()
144
+ cursor_offset = 0
145
+ # Spaces at the beginning/end of expression:
146
+ start_with_space = drs_expression[0].isspace()
147
+ end_with_space = drs_expression[-1].isspace()
148
+ if start_with_space or end_with_space:
149
+ issue: ParsingIssue = Space()
150
+ if self.pedantic:
151
+ errors.append(issue)
152
+ else:
153
+ warnings.append(issue)
154
+ if start_with_space:
155
+ previous_len = len(drs_expression)
156
+ drs_expression = drs_expression.lstrip()
157
+ cursor_offset = previous_len - len(drs_expression)
158
+ if end_with_space:
159
+ drs_expression = drs_expression.rstrip()
160
+ terms = drs_expression.split(separator)
161
+ if len(terms) < 2:
162
+ errors.append(Unparsable(expected_drs_type=drs_type))
163
+ return None, errors, warnings # Early exit
164
+ max_term_index = len(terms)
165
+ cursor_position = initial_cursor_position = len(drs_expression) + 1
166
+ has_white_term = False
167
+ for index in range(max_term_index-1, -1, -1):
168
+ term = terms[index]
169
+ if (is_white_term := term.isspace()) or (not term):
170
+ has_white_term = has_white_term or is_white_term
171
+ cursor_position -= len(term) + 1
172
+ del terms[index]
173
+ continue
174
+ else:
175
+ break
176
+ if cursor_position != initial_cursor_position:
177
+ max_term_index = len(terms)
178
+ column = cursor_position+cursor_offset
179
+ if (drs_type == DrsType.DIRECTORY) and (not has_white_term):
180
+ issue = ExtraSeparator(column=column)
181
+ warnings.append(issue)
182
+ else:
183
+ issue = ExtraChar(column=column)
184
+ errors.append(issue)
185
+ for index in range(max_term_index-1, -1, -1):
186
+ term = terms[index]
187
+ len_term = len(term)
188
+ if not term:
189
+ column = cursor_position + cursor_offset
190
+ issue = ExtraSeparator(column=column)
191
+ if (drs_type != DrsType.DIRECTORY) or self.pedantic or (index == 0):
192
+ errors.append(issue)
193
+ else:
194
+ warnings.append(issue)
195
+ del terms[index]
196
+ if term.isspace():
197
+ column = cursor_position + cursor_offset - len_term
198
+ issue = BlankTerm(column=column)
199
+ errors.append(issue)
200
+ del terms[index]
201
+ cursor_position -= len_term + 1
202
+
203
+ # Mypy doesn't understand that ParsingIssues are DrsIssues...
204
+ sorted_errors = DrsValidator._sort_parser_issues(errors) # type: ignore
205
+ sorted_warnings = DrsValidator._sort_parser_issues(warnings) # type: ignore
206
+ return terms, sorted_errors, sorted_warnings # type: ignore
207
+
208
+ @staticmethod
209
+ def _sort_parser_issues(issues: list[ParsingIssue]) -> list[ParsingIssue]:
210
+ return sorted(issues, key=lambda issue: issue.column if issue.column else 0)
211
+
212
+ def _validate_term(self, term: str, part: DrsPart) -> bool:
213
+ match part.kind:
214
+ case DrsPartKind.COLLECTION:
215
+ casted_part: DrsCollection = cast(DrsCollection, part)
216
+ try:
217
+ matching_terms = projects.valid_term_in_collection(term,
218
+ self.project_id,
219
+ casted_part.collection_id)
220
+ except Exception as e:
221
+ msg = f'problem while validating term: {e}.Abort.'
222
+ raise APIException(msg) from e
223
+ if len(matching_terms) > 0:
224
+ return True
225
+ else:
226
+ return False
227
+ case DrsPartKind.CONSTANT:
228
+ part_casted: DrsConstant = cast(DrsConstant, part)
229
+ return part_casted.value != term
230
+ case _:
231
+ raise RuntimeError(f'unsupported DRS specs part type {part.kind}')
232
+
233
+ def _create_report(self,
234
+ type: DrsType,
235
+ drs_expression: str,
236
+ errors: list[DrsIssue],
237
+ warnings: list[DrsIssue]) -> DrsValidationReport:
238
+ return DrsValidationReport(project_id=self.project_id, type=type,
239
+ expression=drs_expression,
240
+ errors=cast(list[ValidationError], errors),
241
+ warnings=cast(list[ValidationWarning], warnings))
242
+
243
+ def _validate(self,
244
+ drs_expression: str,
245
+ specs: DrsSpecification) -> DrsValidationReport:
246
+ terms, errors, warnings = self._parse(drs_expression, specs.separator, specs.type)
247
+ if not terms:
248
+ return self._create_report(specs.type, drs_expression, errors, warnings) # Early exit.
249
+ term_index = 0
250
+ term_max_index = len(terms)
251
+ part_index = 0
252
+ part_max_index = len(specs.parts)
253
+ matching_code_mapping = dict()
254
+ while part_index < part_max_index:
255
+ term = terms[term_index]
256
+ part = specs.parts[part_index]
257
+ if self._validate_term(term, part):
258
+ term_index += 1
259
+ part_index += 1
260
+ matching_code_mapping[part.__str__()] = 0
261
+ elif part.kind == DrsPartKind.CONSTANT or \
262
+ cast(DrsCollection, part).is_required:
263
+ issue: ComplianceIssue = InvalidTerm(term=term,
264
+ term_position=term_index+1,
265
+ collection_id_or_constant_value=str(part))
266
+ errors.append(issue)
267
+ matching_code_mapping[part.__str__()] = 1
268
+ term_index += 1
269
+ part_index += 1
270
+ else: # The part is not required so try to match the term with the next part.
271
+ part_index += 1
272
+ matching_code_mapping[part.__str__()] = -1
273
+ if term_index == term_max_index:
274
+ break
275
+ # Cases:
276
+ # - All terms and collections have been processed.
277
+ # - Not enough term to process all collections.
278
+ # - Extra terms left whereas all collections have been processed:
279
+ # + The last collections are required => report extra terms.
280
+ # + The last collections are not required and these terms were not validated by them.
281
+ # => Should report error even if the collections are not required.
282
+ if part_index < part_max_index: # Missing terms.
283
+ for index in range(part_index, part_max_index):
284
+ part = specs.parts[index]
285
+ issue = MissingTerm(collection_id=str(part), collection_position=index+1)
286
+ if part.kind == DrsPartKind.CONSTANT or \
287
+ cast(DrsCollection, part).is_required:
288
+ errors.append(issue)
289
+ else:
290
+ warnings.append(issue)
291
+ elif term_index < term_max_index: # Extra terms.
292
+ part_index -= term_max_index - term_index
293
+ for index in range(term_index, term_max_index):
294
+ term = terms[index]
295
+ part = specs.parts[part_index]
296
+ if part.kind != DrsPartKind.CONSTANT and \
297
+ (not cast(DrsCollection, part).is_required) and \
298
+ matching_code_mapping[part.__str__()] < 0:
299
+ issue = ExtraTerm(term=term, term_position=index, collection_id=str(part))
300
+ else:
301
+ issue = ExtraTerm(term=term, term_position=index, collection_id=None)
302
+ errors.append(issue)
303
+ part_index += 1
304
+ return self._create_report(specs.type, drs_expression, errors, warnings)
305
+
306
+
307
+ if __name__ == "__main__":
308
+ project_id = 'cmip6plus'
309
+ validator = DrsValidator(project_id)
310
+ drs_expressions = [
311
+ ".CMIP6Plus.CMIP.IPSL. .MIROC6.amip..r2i2p1f2.ACmon.od550aer. ..gn",
312
+ ]
313
+ import time
314
+ for drs_expression in drs_expressions:
315
+ start_time = time.perf_counter_ns()
316
+ report = validator.validate_dataset_id(drs_expression)
317
+ stop_time = time.perf_counter_ns()
318
+ print(f'elapsed time: {(stop_time-start_time)/1000000} ms')
319
+ if report.nb_errors > 0:
320
+ print(f'error(s): {report.nb_errors}')
321
+ for error in report.errors:
322
+ print(error)
323
+ else:
324
+ print('error(s): 0')
325
+ if report.nb_warnings > 0:
326
+ print(f'warning(s): {report.nb_warnings}')
327
+ for warning in report.warnings:
328
+ print(warning)
329
+ else:
330
+ print('warning(s): 0')
esgvoc/cli/drs.py ADDED
@@ -0,0 +1,248 @@
1
+ import shlex
2
+ import sys
3
+ from typing import List, Optional
4
+
5
+ import typer
6
+ from rich.console import Console
7
+ from rich.table import Table
8
+
9
+ import esgvoc.api as ev
10
+ from esgvoc.apps.drs.generator import DrsGenerator
11
+ from esgvoc.apps.drs.report import DrsGenerationReport, DrsValidationReport
12
+ from esgvoc.apps.drs.validator import DrsValidator
13
+
14
+ app = typer.Typer()
15
+ console = Console()
16
+
17
+
18
+
19
+ # Predefined list of projects and DRS types
20
+ # projects = ["cmip5", "cmip6","cmip6plus", "cmip7"]
21
+ projects = ev.get_all_projects()
22
+ drs_types = ["filename", "directory", "dataset"]
23
+
24
+ def display(table):
25
+ """
26
+ Function to display a rich table in the console.
27
+
28
+ :param table: The table to be displayed
29
+ """
30
+ console = Console(record=True, width=200)
31
+ console.print(table)
32
+
33
+
34
+ @app.command()
35
+ def drsvalid(
36
+ drs_entries: Optional[List[str]] = typer.Argument(None, help="List of DRS validation inputs in the form <project> <drstype> <string>"),
37
+ file: Optional[typer.FileText] = typer.Option(None, "--file", "-f", help="File containing DRS validation inputs, one per line in the form <project> <drstype> <string>"),
38
+ verbose: bool = typer.Option(False, "-v", "--verbose", help="Provide detailed validation results"),
39
+ output: Optional[str] = typer.Option(None, "-o", "--output", help="File to save the DRS entries validation"),
40
+ rm_prefix: Optional[str] = typer.Option(None,"-p","--prefix", help="Remove given prefix from all checked directory"),
41
+ pedantic: Optional[bool] = typer.Option(False,"-e","--enforce", help="Enable pedantic mode, enforcing strict compliance, mean that warnings are now errors.")
42
+
43
+
44
+
45
+ ) -> List[DrsValidationReport]:
46
+ """
47
+ Validates DRS strings for a specific project and type.
48
+
49
+ Args:
50
+ drs_entries (Optional[List[str]]): A list of DRS validation inputs in the form <project> <drstype> <string>.
51
+ file (Optional[typer.FileText]): File containing DRS validation inputs, one per line.
52
+ verbose (bool): If true, prints detailed validation results.
53
+
54
+ Usage Examples:
55
+ # Validate multiple filenames for CMIP6
56
+ drsvalid cmip6 filename file1.nc file2.nc file3.nc
57
+
58
+ # Validate using a file
59
+ drsvalid --file drs_input.txt
60
+ """
61
+ current_project = None
62
+ current_drs_type = None
63
+ reports = []
64
+
65
+ entries = drs_entries or []
66
+
67
+ if not sys.stdin.isatty(): # Check if input is being piped via stdin
68
+ entries.extend(el for line in sys.stdin for el in shlex.split(line))
69
+
70
+
71
+ if file:
72
+ entries.extend(el for line in file for el in line.strip().split(" "))
73
+
74
+ i = 0
75
+ while i < len(entries):
76
+ if entries[i] in [""," "]:
77
+ i+=1
78
+ continue
79
+
80
+ if entries[i] in projects:
81
+ current_project = entries[i]
82
+ i += 1
83
+ continue
84
+ if entries[i] in drs_types:
85
+ current_drs_type = entries[i]
86
+ i += 1
87
+ continue
88
+
89
+ if current_project is None:
90
+ raise typer.BadParameter(f"Invalid project: {entries[i]}")
91
+
92
+ if current_drs_type is None:
93
+ raise typer.BadParameter(f"Invalid drs_type: {entries[i]}")
94
+
95
+ string = entries[i]
96
+ i += 1
97
+ validator = DrsValidator(current_project, pedantic=pedantic)
98
+ report = None
99
+ match current_drs_type:
100
+ case "filename":
101
+ report = validator.validate_file_name(string)
102
+ case "directory":
103
+ if rm_prefix:
104
+ prefix = rm_prefix+"/" if rm_prefix[-1]!="/" else ""
105
+ else:
106
+ prefix=None
107
+ report = validator.validate_directory(string, prefix)
108
+ case "dataset":
109
+ report = validator.validate_dataset_id(string)
110
+ case _:
111
+ raise RuntimeError("drstype is not known")
112
+ reports.append(report)
113
+
114
+ if verbose:
115
+ table = Table(title="Validation result")
116
+ table.add_column("entry", style="cyan")
117
+ table.add_column("project & drs_type", style="cyan")
118
+ table.add_column("warnings", style="magenta")
119
+ table.add_column("errors", style="red")
120
+ table.add_column("valid")
121
+
122
+ for report in reports:
123
+ entry = str(report.expression)
124
+ proj_and_type = str(report.project_id) + " " + report.type + " "
125
+ warnings = "\n".join(["⚠️ " + str(warning) for warning in report.warnings])
126
+ errors = "\n".join(["⚠️ " + str(error) for error in report.errors])
127
+ valid = "✅ Valid" if report else "❌ Invalid"
128
+
129
+ table.add_row("-"*4,"-"*4,"-"*4,"-"*4,"-"*4)
130
+ table.add_row(entry,proj_and_type, warnings, errors, valid)
131
+
132
+ console.print(table)
133
+ elif output:
134
+ with open(output, "w") as f:
135
+ for report in reports:
136
+ f.write(str(report) + "\n")
137
+ console.print(f"DRS validation entries saved to [green]{output}[/green]")
138
+
139
+
140
+ else:
141
+ for report in reports:
142
+ console.print(str(report))
143
+
144
+ return reports
145
+
146
+
147
+ @app.command()
148
+ def drsgen(
149
+ drs_entries: Optional[List[str]] = typer.Argument(None, help="List of inputs to generate DRS in the form <project> <drstype> <bag_of_terms>"),
150
+ file: Optional[typer.FileText] = typer.Option(None, "--file", "-f", help="File containing DRS generation inputs, one per line in the form <project> <drstype> <bag_of_terms>"),
151
+ verbose: bool = typer.Option(False, "-v", "--verbose", help="Provide detailed generation results"),
152
+ output: Optional[str] = typer.Option(None, "-o", "--output", help="File to save the generated DRS entries"),
153
+ ) -> List[DrsGenerationReport]:
154
+ """
155
+ Generates DRS strings for a specific project and type based on input bag of terms.
156
+
157
+ Args:
158
+ drs_entries (Optional[List[str]]): A list of inputs in the form <project> <drstype> <bag_of_terms>.
159
+ file (Optional[typer.FileText]): File containing DRS generation inputs, one per line.
160
+ verbose (bool): If true, prints detailed generation results.
161
+ output (Optional[str]): File path to save the generated DRS entries.
162
+
163
+ Usage Examples:
164
+ # Generate multiple filenames for CMIP6
165
+ drsgen cmip6 filename var1=tas var2=pr
166
+
167
+ # Generate using a file
168
+ drsgen --file drs_input.txt
169
+ """
170
+ current_project = None
171
+ current_drs_type = None
172
+ generated_reports = []
173
+
174
+ entries = drs_entries or []
175
+
176
+ if not sys.stdin.isatty(): # Check if input is being piped via stdin
177
+ entries.extend(el for line in sys.stdin for el in shlex.split(line))
178
+
179
+ if file:
180
+ entries.extend(el for line in file for el in shlex.split(line))
181
+
182
+ i = 0
183
+ while i < len(entries):
184
+ if entries[i] in [""," "]:
185
+ i+=1
186
+ continue
187
+ if entries[i] in projects:
188
+ current_project = entries[i]
189
+ i += 1
190
+ continue
191
+ if entries[i] in drs_types:
192
+ current_drs_type = entries[i]
193
+ i += 1
194
+ continue
195
+
196
+ if current_project is None:
197
+ raise typer.BadParameter(f"Invalid project: {entries[i]}")
198
+
199
+ if current_drs_type is None:
200
+ raise typer.BadParameter(f"Invalid drs_type: {entries[i]}")
201
+
202
+ bag_of_terms = entries[i]
203
+ bag_of_terms = set(entries[i].split(" "))
204
+ i += 1
205
+
206
+ generator = DrsGenerator(current_project)
207
+ report = None
208
+ match current_drs_type:
209
+ case "filename":
210
+ report = generator.generate_file_name_from_bag_of_terms(bag_of_terms)
211
+ case "directory":
212
+ report = generator.generate_directory_from_bag_of_terms(bag_of_terms)
213
+ case "dataset":
214
+ report = generator.generate_dataset_id_from_bag_of_terms(bag_of_terms)
215
+ case _:
216
+ raise RuntimeError("drstype is not known")
217
+ generated_reports.append(report)
218
+
219
+ if verbose:
220
+ table = Table(title="Generation result")
221
+ table.add_column("deduced mapping entry", style="cyan")
222
+ table.add_column("warnings", style="magenta")
223
+ table.add_column("errors", style="red")
224
+ table.add_column("result", style="green", width=10)
225
+ for report in generated_reports:
226
+ entry = str(report.mapping_used)
227
+ warnings = "\n".join(["⚠️ " + str(warning) for warning in report.warnings])
228
+ errors = "\n".join([f"🔍 {error}" for error in report.errors])
229
+ result = report.generated_drs_expression
230
+ table.add_row(entry, warnings, errors, result)
231
+ table.add_row("----", "----", "----", "----")
232
+ if table.columns[3].width is not None and len(result) > table.columns[3].width:
233
+ table.columns[3].width = len(result)+1
234
+ console.print(table)
235
+
236
+ elif output:
237
+ with open(output, "w") as f:
238
+ for report in generated_reports:
239
+ f.write(str(report) + "\n")
240
+ console.print(f"Generated entries saved to [green]{output}[/green]")
241
+
242
+ else:
243
+ for report in generated_reports:
244
+ console.print(str(report))
245
+
246
+ return generated_reports
247
+ if __name__ == "__main__":
248
+ app()
esgvoc/cli/get.py CHANGED
@@ -19,7 +19,7 @@ def validate_key_format(key: str):
19
19
  """
20
20
  Validate if the key matches the XXXX:YYYY:ZZZZ format.
21
21
  """
22
- if not re.match(r"^(\w*-?\w*)*:(\w*-?\w*)*:(\w*-?\w*)*$", key):
22
+ if not re.match(r"^[a-zA-Z0-9\/_]*:[a-zA-Z0-9\/_]*:[a-zA-Z0-9\/_.]*$", key):
23
23
  raise typer.BadParameter(f"Invalid key format: {key}. Must be XXXX:YYYY:ZZZZ.")
24
24
  return key.split(":")
25
25
 
@@ -96,30 +96,31 @@ def display(data:Any):
96
96
  @app.command()
97
97
  def get(keys: list[str] = typer.Argument(..., help="List of keys in XXXX:YYYY:ZZZZ format")):
98
98
  """
99
- Retrieve a specific value from the database system.
100
- This command allows you to fetch a value by specifying the universe/project, data_descriptor/collection,
101
- and term in a structured format.
102
-
103
- Usage:
104
- `get <project>:<collection>:<term>`
105
-
106
- Arguments:
107
- <project> The name of the project to query. like `cmip6plus`
108
- <collection> The name of the collection in the specified database.
109
- <term> The name or term within the specified collection.
110
-
99
+ Retrieve a specific value from the database system.\n
100
+ This command allows you to fetch a value by specifying the universe/project, data_descriptor/collection,
101
+ and term in a structured format.\n
102
+ \n
103
+
104
+ Usage:\n
105
+ `get <project>:<collection>:<term>`\n
106
+ \n
107
+ Arguments:\n
108
+ <project>\tThe project id to query. like `cmip6plus`\n
109
+ <collection>\tThe collection id in the specified database.\n
110
+ <term>\t\tThe term id within the specified collection.\n
111
+ \n
111
112
  Example:
112
- To retrieve the value from the "cmip6plus" project, under the "institution_id" column,
113
- in the term with the identifier "ipsl", you would use:
114
- `get cmip6plus:institution_id:ipsl`
115
- The default project is the universe CV : the argument would be like `universe:institution:ipsl` or `:institution:ipsl`
116
- - to get list of available term from universe institution `:institution:`
117
-
118
- Notes:
119
- - Ensure data exist in your system before using this command (use status command to see whats available).
120
- - Use a colon (`:`) to separate the parts of the argument.
121
- - if more than one argument is given i.e get X:Y:Z A:B:C the 2 results are appended.
122
-
113
+ To retrieve the value from the "cmip6plus" project, under the "institution_id" column, the term with the identifier "ipsl", you would use: \n
114
+ `get cmip6plus:institution_id:ipsl`\n
115
+ The default project is the universe CV : the argument would be like `universe:institution:ipsl` or `:institution:ipsl` \n
116
+ - to get list of available term from universe institution `:institution:` \n
117
+ \n
118
+ \n
119
+ Notes:\n
120
+ - Ensure data exist in your system before using this command (use `esgvoc status` command to see whats available).\n
121
+ - Use a colon (`:`) to separate the parts of the argument. \n
122
+ - if more than one argument is given i.e get X:Y:Z A:B:C the 2 results are appended. \n
123
+ \n
123
124
  """
124
125
  known_projects = get_all_projects()
125
126
 
@@ -133,7 +134,7 @@ def get(keys: list[str] = typer.Argument(..., help="List of keys in XXXX:YYYY:ZZ
133
134
  if where == "" or where=="universe":
134
135
  res = handle_universe(what,who)
135
136
  elif where in known_projects:
136
- res = handle_project(where,what,who,{})
137
+ res = handle_project(where,what,who,None)
137
138
  else:
138
139
  res = handle_unknown(where,what,who)
139
140
 
esgvoc/cli/install.py CHANGED
@@ -1,14 +1,17 @@
1
1
  import typer
2
- from esgvoc.core.service import esg_voc
2
+ from esgvoc.core.service import current_state
3
3
 
4
4
  app = typer.Typer()
5
5
 
6
6
  @app.command()
7
7
  def install():
8
- """
9
- Command to clone and build necessary db with the latest available version
10
-
11
- """
12
- esg_voc.install()
13
-
14
-
8
+ """Initialize default config and apply settings"""
9
+ try:
10
+ typer.echo("Initialized default configuration")
11
+ current_state.synchronize_all()
12
+ except Exception as e:
13
+ typer.echo(f"Error during installation: {str(e)}", err=True)
14
+ raise typer.Exit(1)
15
+
16
+ if __name__ == "__main__":
17
+ app()