esgvoc 0.1.2__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of esgvoc might be problematic. Click here for more details.
- esgvoc/api/__init__.py +15 -4
- esgvoc/api/data_descriptors/__init__.py +3 -0
- esgvoc/api/data_descriptors/directory_date.py +48 -0
- esgvoc/api/project_specs.py +82 -0
- esgvoc/api/projects.py +160 -130
- esgvoc/api/report.py +78 -50
- esgvoc/api/search.py +28 -10
- esgvoc/api/universe.py +17 -18
- esgvoc/apps/__init__.py +7 -0
- esgvoc/apps/drs/__init__.py +0 -16
- esgvoc/apps/drs/constants.py +2 -0
- esgvoc/apps/drs/generator.py +424 -0
- esgvoc/apps/drs/report.py +401 -0
- esgvoc/apps/drs/validator.py +332 -0
- esgvoc/cli/config.py +3 -0
- esgvoc/cli/drs.py +238 -0
- esgvoc/cli/get.py +1 -1
- esgvoc/cli/main.py +4 -3
- esgvoc/cli/status.py +13 -1
- esgvoc/cli/valid.py +1 -5
- esgvoc/core/db/models/mixins.py +7 -0
- esgvoc/core/db/models/project.py +3 -8
- esgvoc/core/db/project_ingestion.py +4 -1
- esgvoc/core/db/universe_ingestion.py +3 -3
- esgvoc/core/service/settings.py +17 -8
- esgvoc/core/service/settings.toml +11 -6
- esgvoc/core/service/settings_default.toml +11 -14
- esgvoc/core/service/state.py +19 -12
- esgvoc-0.2.1.dist-info/METADATA +58 -0
- {esgvoc-0.1.2.dist-info → esgvoc-0.2.1.dist-info}/RECORD +33 -26
- esgvoc-0.2.1.dist-info/licenses/LICENSE.txt +519 -0
- esgvoc/apps/drs/models.py +0 -43
- esgvoc/apps/drs/parser.py +0 -27
- esgvoc-0.1.2.dist-info/METADATA +0 -54
- {esgvoc-0.1.2.dist-info → esgvoc-0.2.1.dist-info}/WHEEL +0 -0
- {esgvoc-0.1.2.dist-info → esgvoc-0.2.1.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,401 @@
|
|
|
1
|
+
from pydantic import BaseModel, computed_field
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
|
+
from typing import Any, Mapping, Iterable, Protocol, ClassVar
|
|
4
|
+
from esgvoc.api.project_specs import DrsType
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class ParserIssueVisitor(Protocol):
|
|
8
|
+
"""
|
|
9
|
+
Specifications for a parser issues visitor.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
def visit_space_issue(self, issue: "Space") -> Any:
|
|
13
|
+
"""Visit a space issue."""
|
|
14
|
+
pass
|
|
15
|
+
def visit_unparsable_issue(self, issue: "Unparsable") -> Any:
|
|
16
|
+
"""Visit a unparsable issue."""
|
|
17
|
+
pass
|
|
18
|
+
def visit_extra_separator_issue(self, issue: "ExtraSeparator") -> Any:
|
|
19
|
+
"""Visit an extra separator issue."""
|
|
20
|
+
pass
|
|
21
|
+
def visit_extra_char_issue(self, issue: "ExtraChar") -> Any:
|
|
22
|
+
"""Visit an extra char issue."""
|
|
23
|
+
pass
|
|
24
|
+
def visit_blank_token_issue(self, issue: "BlankToken") -> Any:
|
|
25
|
+
"""Visit a blank token issue."""
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class ValidationIssueVisitor(Protocol):
|
|
30
|
+
"""
|
|
31
|
+
Specifications for a validation issues visitor.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def visit_filename_extension_issue(self, issue: "FileNameExtensionIssue") -> Any:
|
|
35
|
+
"""Visit a file name extension issue."""
|
|
36
|
+
pass
|
|
37
|
+
def visit_invalid_token_issue(self, issue: "InvalidToken") -> Any:
|
|
38
|
+
"""Visit an invalid token issue."""
|
|
39
|
+
pass
|
|
40
|
+
def visit_extra_token_issue(self, issue: "ExtraToken") -> Any:
|
|
41
|
+
"""Visit an extra token issue."""
|
|
42
|
+
pass
|
|
43
|
+
def visit_missing_token_issue(self, issue: "MissingToken") -> Any:
|
|
44
|
+
"""Visit a missing token issue."""
|
|
45
|
+
pass
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class GeneratorIssueVisitor(Protocol):
|
|
49
|
+
"""
|
|
50
|
+
Specifications for a generator issues visitor.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
def visit_invalid_token_issue(self, issue: "InvalidToken") -> Any:
|
|
54
|
+
"""Visit an invalid token issue."""
|
|
55
|
+
pass
|
|
56
|
+
def visit_missing_token_issue(self, issue: "MissingToken") -> Any:
|
|
57
|
+
"""Visit a missing token issue."""
|
|
58
|
+
pass
|
|
59
|
+
def visit_too_many_tokens_collection_issue(self, issue: "TooManyTokensCollection") -> Any:
|
|
60
|
+
"""Visit a too many tokens collection issue."""
|
|
61
|
+
pass
|
|
62
|
+
def visit_conflicting_collections_issue(self, issue: "ConflictingCollections") -> Any:
|
|
63
|
+
"""Visit a conflicting collections issue."""
|
|
64
|
+
pass
|
|
65
|
+
def visit_assign_token_issue(self, issue: "AssignedToken") -> Any:
|
|
66
|
+
"""Visit an assign token issue."""
|
|
67
|
+
pass
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class DrsIssue(BaseModel, ABC):
|
|
71
|
+
"""
|
|
72
|
+
Generic class for all the DRS issues.
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
@abstractmethod
|
|
76
|
+
def accept(self, visitor) -> Any:
|
|
77
|
+
"""
|
|
78
|
+
Accept an DRS issue visitor.
|
|
79
|
+
|
|
80
|
+
:param visitor: The DRS issue visitor.
|
|
81
|
+
:return: Depending on the visitor.
|
|
82
|
+
:rtype: Any
|
|
83
|
+
"""
|
|
84
|
+
pass
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class ParserIssue(DrsIssue):
|
|
88
|
+
"""
|
|
89
|
+
Generic class for the DRS parser issues.
|
|
90
|
+
"""
|
|
91
|
+
column: int|None = None
|
|
92
|
+
"""the column of faulty characters"""
|
|
93
|
+
|
|
94
|
+
@abstractmethod
|
|
95
|
+
def accept(self, visitor: ParserIssueVisitor) -> Any:
|
|
96
|
+
"""
|
|
97
|
+
Accept an DRS parser issue visitor.
|
|
98
|
+
|
|
99
|
+
:param visitor: The DRS parser issue visitor.
|
|
100
|
+
:type visitor: ParserIssueVisitor
|
|
101
|
+
:return: Depending on the visitor.
|
|
102
|
+
:rtype: Any
|
|
103
|
+
"""
|
|
104
|
+
pass
|
|
105
|
+
|
|
106
|
+
class Space(ParserIssue):
|
|
107
|
+
"""
|
|
108
|
+
Represents a problem of unnecessary space[s] at the beginning or end of the DRS expression.
|
|
109
|
+
Note: `column` is `None`.
|
|
110
|
+
"""
|
|
111
|
+
def accept(self, visitor: ParserIssueVisitor) -> Any:
|
|
112
|
+
return visitor.visit_space_issue(self)
|
|
113
|
+
def __str__(self):
|
|
114
|
+
return "expression is surrounded by white space[s]"
|
|
115
|
+
def __repr__(self) -> str:
|
|
116
|
+
return self.__str__()
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class Unparsable(ParserIssue):
|
|
120
|
+
"""
|
|
121
|
+
Represents a problem of non-compliance of the DRS expression.
|
|
122
|
+
Note: `column` is `None`.
|
|
123
|
+
"""
|
|
124
|
+
expected_drs_type: DrsType
|
|
125
|
+
"""The expected DRS type of the expression (directory, file name or dataset id)."""
|
|
126
|
+
def accept(self, visitor: ParserIssueVisitor) -> Any:
|
|
127
|
+
return visitor.visit_unparsable_issue(self)
|
|
128
|
+
def __str__(self):
|
|
129
|
+
return "unable to parse this expression"
|
|
130
|
+
def __repr__(self) -> str:
|
|
131
|
+
return self.__str__()
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
class ExtraSeparator(ParserIssue):
|
|
135
|
+
"""
|
|
136
|
+
Represents a problem of multiple separator occurrences in the DRS expression.
|
|
137
|
+
"""
|
|
138
|
+
def accept(self, visitor: ParserIssueVisitor) -> Any:
|
|
139
|
+
return visitor.visit_extra_separator_issue(self)
|
|
140
|
+
def __str__(self):
|
|
141
|
+
return f"extra separator(s) at column {self.column}"
|
|
142
|
+
def __repr__(self) -> str:
|
|
143
|
+
return self.__str__()
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class ExtraChar(ParserIssue):
|
|
147
|
+
"""
|
|
148
|
+
Represents a problem of extra characters at the end of the DRS expression.
|
|
149
|
+
"""
|
|
150
|
+
def accept(self, visitor: ParserIssueVisitor) -> Any:
|
|
151
|
+
return visitor.visit_extra_char_issue(self)
|
|
152
|
+
def __str__(self):
|
|
153
|
+
return f"extra character(s) at column {self.column}"
|
|
154
|
+
def __repr__(self) -> str:
|
|
155
|
+
return self.__str__()
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
class BlankToken(ParserIssue):
|
|
159
|
+
"""
|
|
160
|
+
Represents a problem of blank token in the DRS expression (i.e., space[s] surrounded by separators).
|
|
161
|
+
"""
|
|
162
|
+
def accept(self, visitor: ParserIssueVisitor) -> Any:
|
|
163
|
+
return visitor.visit_blank_token_issue(self)
|
|
164
|
+
def __str__(self):
|
|
165
|
+
return f"blank token at column {self.column}"
|
|
166
|
+
def __repr__(self) -> str:
|
|
167
|
+
return self.__str__()
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
class ValidationIssue(DrsIssue):
|
|
171
|
+
"""
|
|
172
|
+
Generic class for the validation issues.
|
|
173
|
+
"""
|
|
174
|
+
@abstractmethod
|
|
175
|
+
def accept(self, visitor: ValidationIssueVisitor) -> Any:
|
|
176
|
+
"""
|
|
177
|
+
Accept an DRS validation issue visitor.
|
|
178
|
+
|
|
179
|
+
:param visitor: The DRS validation issue visitor.
|
|
180
|
+
:type visitor: ValidationIssueVisitor
|
|
181
|
+
:return: Depending on the visitor.
|
|
182
|
+
:rtype: Any
|
|
183
|
+
"""
|
|
184
|
+
pass
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
class FileNameExtensionIssue(ValidationIssue):
|
|
188
|
+
"""
|
|
189
|
+
Represents a problem on the given file name extension (missing or not compliant).
|
|
190
|
+
"""
|
|
191
|
+
expected_extension: str
|
|
192
|
+
"""The expected file name extension."""
|
|
193
|
+
def accept(self, visitor: ValidationIssueVisitor) -> Any:
|
|
194
|
+
return visitor.visit_filename_extension_issue(self)
|
|
195
|
+
def __str__(self):
|
|
196
|
+
return f"filename extension missing or not compliant with '{self.expected_extension}'"
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
class TokenIssue(ValidationIssue):
|
|
200
|
+
"""
|
|
201
|
+
Generic class for the DRS token issues.
|
|
202
|
+
"""
|
|
203
|
+
token: str
|
|
204
|
+
"""The faulty token."""
|
|
205
|
+
token_position: int
|
|
206
|
+
"""The position of the faulty token (the part position, not the column of the characters."""
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
class GeneratorIssue(DrsIssue):
|
|
210
|
+
"""
|
|
211
|
+
Generic class for the DRS generator issues.
|
|
212
|
+
"""
|
|
213
|
+
@abstractmethod
|
|
214
|
+
def accept(self, visitor: GeneratorIssueVisitor) -> Any:
|
|
215
|
+
"""
|
|
216
|
+
Accept an DRS generator issue visitor.
|
|
217
|
+
|
|
218
|
+
:param visitor: The DRS generator issue visitor.
|
|
219
|
+
:type visitor: GeneratorIssueVisitor
|
|
220
|
+
:return: Depending on the visitor.
|
|
221
|
+
:rtype: Any
|
|
222
|
+
"""
|
|
223
|
+
pass
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
class InvalidToken(TokenIssue, GeneratorIssue):
|
|
227
|
+
"""
|
|
228
|
+
Represents a problem of invalid token against a collection or a constant part of a DRS specification.
|
|
229
|
+
"""
|
|
230
|
+
collection_id_or_constant_value: str
|
|
231
|
+
"""The collection id or the constant part of a DRS specification."""
|
|
232
|
+
def accept(self, visitor: ValidationIssueVisitor|GeneratorIssueVisitor) -> Any:
|
|
233
|
+
return visitor.visit_invalid_token_issue(self)
|
|
234
|
+
def __str__(self):
|
|
235
|
+
return f"token '{self.token}' not compliant with {self.collection_id_or_constant_value} at position {self.token_position}"
|
|
236
|
+
def __repr__(self) -> str:
|
|
237
|
+
return self.__str__()
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
class ExtraToken(TokenIssue):
|
|
241
|
+
"""
|
|
242
|
+
Represents a problem of extra token at the end of the given DRS expression.
|
|
243
|
+
All part of the DRS specification have been processed and this token is not necessary
|
|
244
|
+
(`collection_id` is `None`) or it has been invalidated by an optional collection part
|
|
245
|
+
of the DRS specification (`collection_id` is set).
|
|
246
|
+
"""
|
|
247
|
+
collection_id: str|None
|
|
248
|
+
"""The optional collection id or `None`"""
|
|
249
|
+
def accept(self, visitor: ValidationIssueVisitor) -> Any:
|
|
250
|
+
return visitor.visit_extra_token_issue(self)
|
|
251
|
+
def __str__(self):
|
|
252
|
+
repr = f"extra token {self.token}"
|
|
253
|
+
if self.collection_id:
|
|
254
|
+
repr += f" invalidated by the optional collection {self.collection_id}"
|
|
255
|
+
return repr + f" at position {self.token_position}"
|
|
256
|
+
def __repr__(self) -> str:
|
|
257
|
+
return self.__str__()
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
class MissingToken(ValidationIssue, GeneratorIssue):
|
|
261
|
+
"""
|
|
262
|
+
Represents a problem of missing token for a collection part of the DRS specification.
|
|
263
|
+
"""
|
|
264
|
+
collection_id: str
|
|
265
|
+
"""The collection id."""
|
|
266
|
+
collection_position: int
|
|
267
|
+
"""The collection part position (not the column of the characters)."""
|
|
268
|
+
def accept(self, visitor: ValidationIssueVisitor|GeneratorIssueVisitor) -> Any:
|
|
269
|
+
return visitor.visit_missing_token_issue(self)
|
|
270
|
+
def __str__(self):
|
|
271
|
+
return f'missing token for {self.collection_id} at position {self.collection_position}'
|
|
272
|
+
def __repr__(self) -> str:
|
|
273
|
+
return self.__str__()
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
class TooManyTokensCollection(GeneratorIssue):
|
|
277
|
+
"""
|
|
278
|
+
Represents a problem while inferring a mapping collection - token in the generation
|
|
279
|
+
of a DRS expression based on a bag of tokens. The problem is that more than one token
|
|
280
|
+
is able to match this collection. The generator is unable to choose from these tokens
|
|
281
|
+
"""
|
|
282
|
+
collection_id: str
|
|
283
|
+
"""The collection id."""
|
|
284
|
+
tokens: list[str]
|
|
285
|
+
"""The faulty tokens."""
|
|
286
|
+
def accept(self, visitor: GeneratorIssueVisitor) -> Any:
|
|
287
|
+
return visitor.visit_too_many_tokens_collection_issue(self)
|
|
288
|
+
|
|
289
|
+
def __str__(self):
|
|
290
|
+
tokens_str = ", ".join(token for token in self.tokens)
|
|
291
|
+
result = f'collection {self.collection_id} has more than one token ({tokens_str})'
|
|
292
|
+
return result
|
|
293
|
+
def __repr__(self) -> str:
|
|
294
|
+
return self.__str__()
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
class ConflictingCollections(GeneratorIssue):
|
|
298
|
+
"""
|
|
299
|
+
Represents a problem while inferring a mapping collection - token in the generation
|
|
300
|
+
of a DRS expression based on a bag of tokens. The problem is that these collections shares the
|
|
301
|
+
very same tokens. The generator is unable to choose which token for which collection.
|
|
302
|
+
"""
|
|
303
|
+
collection_ids: list[str]
|
|
304
|
+
"""The ids of the collections."""
|
|
305
|
+
tokens: list[str]
|
|
306
|
+
"""The shared tokens."""
|
|
307
|
+
def accept(self, visitor: GeneratorIssueVisitor) -> Any:
|
|
308
|
+
return visitor.visit_conflicting_collections_issue(self)
|
|
309
|
+
def __str__(self):
|
|
310
|
+
collection_ids_str = ", ".join(collection_id for collection_id in self.collection_ids)
|
|
311
|
+
tokens_str = ", ".join(token for token in self.tokens)
|
|
312
|
+
result = f"collections {collection_ids_str} are competing for the same token(s) {tokens_str}"
|
|
313
|
+
return result
|
|
314
|
+
def __repr__(self) -> str:
|
|
315
|
+
return self.__str__()
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
class AssignedToken(GeneratorIssue):
|
|
319
|
+
"""
|
|
320
|
+
Represents a decision of the Generator to assign this token to the collection, that may not be.
|
|
321
|
+
relevant.
|
|
322
|
+
"""
|
|
323
|
+
collection_id: str
|
|
324
|
+
"""The collection id."""
|
|
325
|
+
token: str
|
|
326
|
+
"""The token."""
|
|
327
|
+
def accept(self, visitor: GeneratorIssueVisitor) -> Any:
|
|
328
|
+
return visitor.visit_assign_token_issue(self)
|
|
329
|
+
def __str__(self):
|
|
330
|
+
result = f"assign token {self.token} for collection {self.collection_id}"
|
|
331
|
+
return result
|
|
332
|
+
def __repr__(self) -> str:
|
|
333
|
+
return self.__str__()
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
class DrsReport(BaseModel):
|
|
337
|
+
"""
|
|
338
|
+
Generic DRS application report class.
|
|
339
|
+
"""
|
|
340
|
+
project_id: str
|
|
341
|
+
"""The project id associated to the result of the DRS application"""
|
|
342
|
+
type: DrsType
|
|
343
|
+
"""The type of the DRS"""
|
|
344
|
+
errors: list[DrsIssue]
|
|
345
|
+
"""A list of DRS issues that are considered as errors."""
|
|
346
|
+
warnings: list[DrsIssue]
|
|
347
|
+
"""A list of DRS issues that are considered as warnings."""
|
|
348
|
+
@computed_field # type: ignore
|
|
349
|
+
@property
|
|
350
|
+
def nb_errors(self) -> int:
|
|
351
|
+
"""The number of errors."""
|
|
352
|
+
return len(self.errors) if self.errors else 0
|
|
353
|
+
@computed_field # type: ignore
|
|
354
|
+
@property
|
|
355
|
+
def nb_warnings(self) -> int:
|
|
356
|
+
"""The number of warnings."""
|
|
357
|
+
return len(self.warnings) if self.warnings else 0
|
|
358
|
+
@computed_field # type: ignore
|
|
359
|
+
@property
|
|
360
|
+
def validated(self) -> bool:
|
|
361
|
+
"""The correctness of the result of the DRS application."""
|
|
362
|
+
return False if self.errors else True
|
|
363
|
+
def __len__(self) -> int:
|
|
364
|
+
return self.nb_errors
|
|
365
|
+
def __bool__(self) -> bool:
|
|
366
|
+
return self.validated
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
class DrsValidationReport(DrsReport):
|
|
370
|
+
"""
|
|
371
|
+
The DRS validation report class.
|
|
372
|
+
"""
|
|
373
|
+
expression: str
|
|
374
|
+
"""The DRS expression been checked"""
|
|
375
|
+
def __str__(self) -> str:
|
|
376
|
+
return f"'{self.expression}' has {self.nb_errors} error(s) and " + \
|
|
377
|
+
f"{self.nb_warnings} warning(s)"
|
|
378
|
+
def __repr__(self) -> str:
|
|
379
|
+
return self.__str__()
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
class DrsGeneratorReport(DrsReport):
|
|
383
|
+
"""
|
|
384
|
+
The DRS generator report.
|
|
385
|
+
"""
|
|
386
|
+
MISSING_TAG: ClassVar[str] = '[MISSING]'
|
|
387
|
+
"""Tag used in the DRS generated expression to replace a missing term."""
|
|
388
|
+
INVALID_TAG: ClassVar[str] = '[INVALID]'
|
|
389
|
+
"""Tag used in the DRS generated expression to replace a invalid term."""
|
|
390
|
+
given_mapping_or_bag_of_tokens: Mapping|Iterable
|
|
391
|
+
"""The mapping or the bag of tokens given."""
|
|
392
|
+
mapping_used: Mapping
|
|
393
|
+
"""The mapping inferred from the given bag of tokens (same mapping otherwise)."""
|
|
394
|
+
generated_drs_expression: str
|
|
395
|
+
"""The generated DRS expression with possible tags to replace missing or invalid tokens"""
|
|
396
|
+
def __str__(self) -> str:
|
|
397
|
+
return f"'{self.generated_drs_expression}' has {self.nb_errors} error(s) and " + \
|
|
398
|
+
f"{self.nb_warnings} warning(s)"
|
|
399
|
+
def __repr__(self) -> str:
|
|
400
|
+
return self.__str__()
|
|
401
|
+
|