pysdmx 1.8.1__py3-none-any.whl → 1.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pysdmx/__extras_check.py +15 -1
- pysdmx/__init__.py +1 -1
- pysdmx/api/fmr/__init__.py +3 -2
- pysdmx/io/input_processor.py +9 -6
- pysdmx/io/json/fusion/messages/category.py +69 -41
- pysdmx/io/json/sdmxjson2/messages/__init__.py +4 -0
- pysdmx/io/json/sdmxjson2/messages/category.py +76 -43
- pysdmx/io/json/sdmxjson2/messages/code.py +16 -6
- pysdmx/io/json/sdmxjson2/messages/constraint.py +235 -16
- pysdmx/io/json/sdmxjson2/messages/core.py +2 -1
- pysdmx/io/json/sdmxjson2/messages/dsd.py +35 -7
- pysdmx/io/json/sdmxjson2/messages/map.py +5 -4
- pysdmx/io/json/sdmxjson2/messages/metadataflow.py +1 -0
- pysdmx/io/json/sdmxjson2/messages/msd.py +18 -10
- pysdmx/io/json/sdmxjson2/messages/schema.py +2 -2
- pysdmx/io/json/sdmxjson2/messages/structure.py +81 -44
- pysdmx/io/json/sdmxjson2/messages/vtl.py +13 -9
- pysdmx/io/json/sdmxjson2/reader/doc_validation.py +112 -0
- pysdmx/io/json/sdmxjson2/reader/metadata.py +8 -1
- pysdmx/io/json/sdmxjson2/reader/structure.py +9 -2
- pysdmx/io/reader.py +18 -4
- pysdmx/io/xml/__data_aux.py +9 -4
- pysdmx/io/xml/__parse_xml.py +2 -0
- pysdmx/io/xml/__structure_aux_reader.py +70 -0
- pysdmx/io/xml/__structure_aux_writer.py +63 -9
- pysdmx/io/xml/__tokens.py +3 -0
- pysdmx/io/xml/__write_aux.py +35 -30
- pysdmx/io/xml/header.py +48 -35
- pysdmx/model/__base.py +47 -2
- pysdmx/model/__init__.py +18 -0
- pysdmx/model/category.py +23 -1
- pysdmx/model/constraint.py +69 -0
- pysdmx/model/message.py +97 -72
- pysdmx/toolkit/vtl/__init__.py +10 -1
- pysdmx/toolkit/vtl/_validations.py +8 -12
- pysdmx/toolkit/vtl/convert.py +333 -0
- pysdmx/toolkit/vtl/script_generation.py +1 -1
- pysdmx/util/_model_utils.py +40 -3
- {pysdmx-1.8.1.dist-info → pysdmx-1.10.0.dist-info}/METADATA +6 -3
- {pysdmx-1.8.1.dist-info → pysdmx-1.10.0.dist-info}/RECORD +42 -39
- {pysdmx-1.8.1.dist-info → pysdmx-1.10.0.dist-info}/WHEEL +0 -0
- {pysdmx-1.8.1.dist-info → pysdmx-1.10.0.dist-info}/licenses/LICENSE +0 -0
pysdmx/model/message.py
CHANGED
|
@@ -19,10 +19,11 @@ from typing import Any, Dict, List, Optional, Sequence, Type, Union
|
|
|
19
19
|
from msgspec import Struct
|
|
20
20
|
|
|
21
21
|
from pysdmx.errors import Invalid, NotFound
|
|
22
|
-
from pysdmx.model.__base import
|
|
22
|
+
from pysdmx.model.__base import MaintainableArtefact, Organisation
|
|
23
23
|
from pysdmx.model.category import Categorisation, CategoryScheme
|
|
24
24
|
from pysdmx.model.code import Codelist, Hierarchy, HierarchyAssociation
|
|
25
25
|
from pysdmx.model.concept import ConceptScheme
|
|
26
|
+
from pysdmx.model.constraint import DataConstraint
|
|
26
27
|
from pysdmx.model.dataflow import (
|
|
27
28
|
Dataflow,
|
|
28
29
|
DataStructureDefinition,
|
|
@@ -87,7 +88,7 @@ class Header(Struct, repr_omit_defaults=True, kw_only=True):
|
|
|
87
88
|
test: bool = False
|
|
88
89
|
prepared: datetime = datetime.now(timezone.utc)
|
|
89
90
|
sender: Organisation = Organisation(id="ZZZ")
|
|
90
|
-
receiver:
|
|
91
|
+
receiver: Sequence[Organisation] = ()
|
|
91
92
|
source: Optional[str] = None
|
|
92
93
|
dataset_action: Optional[ActionType] = None
|
|
93
94
|
structure: Optional[Dict[str, str]] = None
|
|
@@ -99,7 +100,13 @@ class Header(Struct, repr_omit_defaults=True, kw_only=True):
|
|
|
99
100
|
self.sender = Organisation(id=self.sender)
|
|
100
101
|
|
|
101
102
|
if isinstance(self.receiver, str):
|
|
102
|
-
self.receiver = Organisation(id=self.receiver)
|
|
103
|
+
self.receiver = [Organisation(id=self.receiver)]
|
|
104
|
+
|
|
105
|
+
if isinstance(self.receiver, Organisation):
|
|
106
|
+
self.receiver = [self.receiver]
|
|
107
|
+
|
|
108
|
+
if self.receiver is None:
|
|
109
|
+
self.receiver = ()
|
|
103
110
|
|
|
104
111
|
def __str__(self) -> str:
|
|
105
112
|
"""Custom string representation without the class name."""
|
|
@@ -171,7 +178,9 @@ class StructureMessage(Struct, repr_omit_defaults=True, frozen=True):
|
|
|
171
178
|
attrs.append(f"{attr}={repr(value)}")
|
|
172
179
|
return f"{self.__class__.__name__}({', '.join(attrs)})"
|
|
173
180
|
|
|
174
|
-
|
|
181
|
+
# Returns MaintainableArtefacts only, but mypy complains.
|
|
182
|
+
# As it is an internal method, it's acceptable.
|
|
183
|
+
def __get_elements(self, type_: Type[MaintainableArtefact]) -> List[Any]:
|
|
175
184
|
"""Returns a list of elements of a specific type."""
|
|
176
185
|
if self.structures is None:
|
|
177
186
|
raise NotFound(
|
|
@@ -180,6 +189,8 @@ class StructureMessage(Struct, repr_omit_defaults=True, frozen=True):
|
|
|
180
189
|
structures = [e for e in self.structures if isinstance(e, type_)]
|
|
181
190
|
return structures
|
|
182
191
|
|
|
192
|
+
# Returns Codelist or ValueList only, but mypy complains.
|
|
193
|
+
# As it is an internal method, it's acceptable.
|
|
183
194
|
def __get_enumerations(
|
|
184
195
|
self, type_: Type[Any], is_vl: bool = False
|
|
185
196
|
) -> List[Any]:
|
|
@@ -188,9 +199,11 @@ class StructureMessage(Struct, repr_omit_defaults=True, frozen=True):
|
|
|
188
199
|
t = "valuelist" if is_vl else "codelist"
|
|
189
200
|
return [e for e in enums if e.sdmx_type == t]
|
|
190
201
|
|
|
202
|
+
# Returns MaintainableArtefacts only,
|
|
203
|
+
# but mypy complains. As it is an internal method, it's acceptable.
|
|
191
204
|
def __get_single_structure(
|
|
192
205
|
self,
|
|
193
|
-
type_: Type[
|
|
206
|
+
type_: Type[MaintainableArtefact],
|
|
194
207
|
short_urn: str,
|
|
195
208
|
) -> Any:
|
|
196
209
|
"""Returns a specific element from content."""
|
|
@@ -208,10 +221,20 @@ class StructureMessage(Struct, repr_omit_defaults=True, frozen=True):
|
|
|
208
221
|
"Could not find the requested element.",
|
|
209
222
|
)
|
|
210
223
|
|
|
224
|
+
# Return collection of a certain type
|
|
225
|
+
|
|
211
226
|
def get_agency_schemes(self) -> List[AgencyScheme]:
|
|
212
227
|
"""Returns the AgencySchemes."""
|
|
213
228
|
return self.__get_elements(AgencyScheme)
|
|
214
229
|
|
|
230
|
+
def get_categorisations(self) -> List[Categorisation]:
|
|
231
|
+
"""Returns the Categorisations."""
|
|
232
|
+
return self.__get_elements(Categorisation)
|
|
233
|
+
|
|
234
|
+
def get_category_schemes(self) -> List[CategoryScheme]:
|
|
235
|
+
"""Returns the CategorySchemes."""
|
|
236
|
+
return self.__get_elements(CategoryScheme)
|
|
237
|
+
|
|
215
238
|
def get_codelists(self) -> List[Codelist]:
|
|
216
239
|
"""Returns the Codelists."""
|
|
217
240
|
return self.__get_enumerations(Codelist, False)
|
|
@@ -220,6 +243,10 @@ class StructureMessage(Struct, repr_omit_defaults=True, frozen=True):
|
|
|
220
243
|
"""Returns the Concept Schemes."""
|
|
221
244
|
return self.__get_elements(ConceptScheme)
|
|
222
245
|
|
|
246
|
+
def get_custom_type_schemes(self) -> List[CustomTypeScheme]:
|
|
247
|
+
"""Returns the CustomType Schemes."""
|
|
248
|
+
return self.__get_elements(CustomTypeScheme)
|
|
249
|
+
|
|
223
250
|
def get_data_structure_definitions(
|
|
224
251
|
self,
|
|
225
252
|
) -> List[DataStructureDefinition]:
|
|
@@ -230,53 +257,13 @@ class StructureMessage(Struct, repr_omit_defaults=True, frozen=True):
|
|
|
230
257
|
"""Returns the Dataflows."""
|
|
231
258
|
return self.__get_elements(Dataflow)
|
|
232
259
|
|
|
233
|
-
def
|
|
234
|
-
"""Returns the
|
|
235
|
-
return self.__get_elements(
|
|
236
|
-
|
|
237
|
-
def get_organisation_scheme(self, short_urn: str) -> AgencyScheme:
|
|
238
|
-
"""Returns a specific OrganisationScheme."""
|
|
239
|
-
return self.__get_single_structure(AgencyScheme, short_urn)
|
|
240
|
-
|
|
241
|
-
def get_codelist(self, short_urn: str) -> Codelist:
|
|
242
|
-
"""Returns a specific Codelist."""
|
|
243
|
-
return self.__get_single_structure(Codelist, short_urn)
|
|
244
|
-
|
|
245
|
-
def get_concept_scheme(self, short_urn: str) -> ConceptScheme:
|
|
246
|
-
"""Returns a specific Concept Scheme."""
|
|
247
|
-
return self.__get_single_structure(ConceptScheme, short_urn)
|
|
248
|
-
|
|
249
|
-
def get_data_structure_definition(
|
|
250
|
-
self, short_urn: str
|
|
251
|
-
) -> DataStructureDefinition:
|
|
252
|
-
"""Returns a specific DataStructureDefinition."""
|
|
253
|
-
return self.__get_single_structure(DataStructureDefinition, short_urn)
|
|
260
|
+
def get_data_constraints(self) -> List[DataConstraint]:
|
|
261
|
+
"""Returns the DataConstraints."""
|
|
262
|
+
return self.__get_elements(DataConstraint)
|
|
254
263
|
|
|
255
|
-
def
|
|
256
|
-
"""Returns
|
|
257
|
-
return self.
|
|
258
|
-
|
|
259
|
-
def get_transformation_schemes(self) -> List[TransformationScheme]:
|
|
260
|
-
"""Returns the TransformationSchemes."""
|
|
261
|
-
return self.__get_elements(TransformationScheme)
|
|
262
|
-
|
|
263
|
-
def get_user_defined_operator_schemes(
|
|
264
|
-
self,
|
|
265
|
-
) -> List[UserDefinedOperatorScheme]:
|
|
266
|
-
"""Returns the UserDefinedOperatorSchemes."""
|
|
267
|
-
return self.__get_elements(UserDefinedOperatorScheme)
|
|
268
|
-
|
|
269
|
-
def get_ruleset_schemes(self) -> List[RulesetScheme]:
|
|
270
|
-
"""Returns the RulesetSchemes."""
|
|
271
|
-
return self.__get_elements(RulesetScheme)
|
|
272
|
-
|
|
273
|
-
def get_category_schemes(self) -> List[CategoryScheme]:
|
|
274
|
-
"""Returns the CategorySchemes."""
|
|
275
|
-
return self.__get_elements(CategoryScheme)
|
|
276
|
-
|
|
277
|
-
def get_value_lists(self) -> List[Codelist]:
|
|
278
|
-
"""Returns the Codelists."""
|
|
279
|
-
return self.__get_enumerations(Codelist, True)
|
|
264
|
+
def get_data_provider_schemes(self) -> List[DataProviderScheme]:
|
|
265
|
+
"""Returns the DataProviderSchemes."""
|
|
266
|
+
return self.__get_elements(DataProviderScheme)
|
|
280
267
|
|
|
281
268
|
def get_hierarchies(self) -> List[Hierarchy]:
|
|
282
269
|
"""Returns the HierarchyCodelists."""
|
|
@@ -286,13 +273,9 @@ class StructureMessage(Struct, repr_omit_defaults=True, frozen=True):
|
|
|
286
273
|
"""Returns the HierarchyAssociations."""
|
|
287
274
|
return self.__get_elements(HierarchyAssociation)
|
|
288
275
|
|
|
289
|
-
def
|
|
290
|
-
"""Returns the
|
|
291
|
-
return self.__get_elements(
|
|
292
|
-
|
|
293
|
-
def get_provision_agreements(self) -> List[ProvisionAgreement]:
|
|
294
|
-
"""Returns the ProvisionAgreements."""
|
|
295
|
-
return self.__get_elements(ProvisionAgreement)
|
|
276
|
+
def get_metadataflows(self) -> List[Metadataflow]:
|
|
277
|
+
"""Returns the MetadataProvisionAgreements."""
|
|
278
|
+
return self.__get_elements(Metadataflow)
|
|
296
279
|
|
|
297
280
|
def get_metadata_provider_schemes(self) -> List[MetadataProviderScheme]:
|
|
298
281
|
"""Returns the MetadataProviderSchemes."""
|
|
@@ -308,9 +291,15 @@ class StructureMessage(Struct, repr_omit_defaults=True, frozen=True):
|
|
|
308
291
|
"""Returns the MetadataStructures."""
|
|
309
292
|
return self.__get_elements(MetadataStructure)
|
|
310
293
|
|
|
311
|
-
def
|
|
312
|
-
|
|
313
|
-
|
|
294
|
+
def get_name_personalisation_schemes(
|
|
295
|
+
self,
|
|
296
|
+
) -> List[NamePersonalisationScheme]:
|
|
297
|
+
"""Returns the NamePersonalisationSchemes."""
|
|
298
|
+
return self.__get_elements(NamePersonalisationScheme)
|
|
299
|
+
|
|
300
|
+
def get_provision_agreements(self) -> List[ProvisionAgreement]:
|
|
301
|
+
"""Returns the ProvisionAgreements."""
|
|
302
|
+
return self.__get_elements(ProvisionAgreement)
|
|
314
303
|
|
|
315
304
|
def get_representation_maps(
|
|
316
305
|
self,
|
|
@@ -321,23 +310,59 @@ class StructureMessage(Struct, repr_omit_defaults=True, frozen=True):
|
|
|
321
310
|
out.extend(self.__get_elements(MultiRepresentationMap))
|
|
322
311
|
return out
|
|
323
312
|
|
|
324
|
-
def
|
|
325
|
-
"""Returns the
|
|
326
|
-
return self.__get_elements(
|
|
313
|
+
def get_ruleset_schemes(self) -> List[RulesetScheme]:
|
|
314
|
+
"""Returns the RulesetSchemes."""
|
|
315
|
+
return self.__get_elements(RulesetScheme)
|
|
327
316
|
|
|
328
|
-
def
|
|
329
|
-
"""Returns the
|
|
330
|
-
return self.__get_elements(
|
|
317
|
+
def get_structure_maps(self) -> List[StructureMap]:
|
|
318
|
+
"""Returns the StructureMaps."""
|
|
319
|
+
return self.__get_elements(StructureMap)
|
|
320
|
+
|
|
321
|
+
def get_transformation_schemes(self) -> List[TransformationScheme]:
|
|
322
|
+
"""Returns the TransformationSchemes."""
|
|
323
|
+
return self.__get_elements(TransformationScheme)
|
|
324
|
+
|
|
325
|
+
def get_user_defined_operator_schemes(
|
|
326
|
+
self,
|
|
327
|
+
) -> List[UserDefinedOperatorScheme]:
|
|
328
|
+
"""Returns the UserDefinedOperatorSchemes."""
|
|
329
|
+
return self.__get_elements(UserDefinedOperatorScheme)
|
|
330
|
+
|
|
331
|
+
def get_value_lists(self) -> List[Codelist]:
|
|
332
|
+
"""Returns the Codelists."""
|
|
333
|
+
return self.__get_enumerations(Codelist, True)
|
|
331
334
|
|
|
332
335
|
def get_vtl_mapping_schemes(self) -> List[VtlMappingScheme]:
|
|
333
336
|
"""Returns the VTL Mapping Schemes."""
|
|
334
337
|
return self.__get_elements(VtlMappingScheme)
|
|
335
338
|
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
) ->
|
|
339
|
-
"""Returns
|
|
340
|
-
return self.
|
|
339
|
+
# Return individual items
|
|
340
|
+
|
|
341
|
+
def get_codelist(self, short_urn: str) -> Codelist:
|
|
342
|
+
"""Returns a specific Codelist."""
|
|
343
|
+
return self.__get_single_structure(Codelist, short_urn)
|
|
344
|
+
|
|
345
|
+
def get_concept_scheme(self, short_urn: str) -> ConceptScheme:
|
|
346
|
+
"""Returns a specific Concept Scheme."""
|
|
347
|
+
return self.__get_single_structure(ConceptScheme, short_urn)
|
|
348
|
+
|
|
349
|
+
def get_dataflow(self, short_urn: str) -> Dataflow:
|
|
350
|
+
"""Returns a specific Dataflow."""
|
|
351
|
+
return self.__get_single_structure(Dataflow, short_urn)
|
|
352
|
+
|
|
353
|
+
def get_data_structure_definition(
|
|
354
|
+
self, short_urn: str
|
|
355
|
+
) -> DataStructureDefinition:
|
|
356
|
+
"""Returns a specific DataStructureDefinition."""
|
|
357
|
+
return self.__get_single_structure(DataStructureDefinition, short_urn)
|
|
358
|
+
|
|
359
|
+
def get_organisation_scheme(self, short_urn: str) -> AgencyScheme:
|
|
360
|
+
"""Returns a specific AgencyScheme."""
|
|
361
|
+
return self.__get_single_structure(AgencyScheme, short_urn)
|
|
362
|
+
|
|
363
|
+
def get_provision_agreement(self, short_urn: str) -> ProvisionAgreement:
|
|
364
|
+
"""Returns a specific Provision Agreement."""
|
|
365
|
+
return self.__get_single_structure(ProvisionAgreement, short_urn)
|
|
341
366
|
|
|
342
367
|
|
|
343
368
|
class MetadataMessage(Struct, frozen=True):
|
pysdmx/toolkit/vtl/__init__.py
CHANGED
|
@@ -1,6 +1,15 @@
|
|
|
1
1
|
"""VTL toolkit for PySDMX."""
|
|
2
2
|
|
|
3
|
+
from pysdmx.toolkit.vtl.convert import (
|
|
4
|
+
convert_dataset_to_sdmx,
|
|
5
|
+
convert_dataset_to_vtl,
|
|
6
|
+
)
|
|
3
7
|
from pysdmx.toolkit.vtl.script_generation import generate_vtl_script
|
|
4
8
|
from pysdmx.toolkit.vtl.validation import model_validations
|
|
5
9
|
|
|
6
|
-
__all__ = [
|
|
10
|
+
__all__ = [
|
|
11
|
+
"model_validations",
|
|
12
|
+
"generate_vtl_script",
|
|
13
|
+
"convert_dataset_to_vtl",
|
|
14
|
+
"convert_dataset_to_sdmx",
|
|
15
|
+
]
|
|
@@ -1,13 +1,9 @@
|
|
|
1
1
|
"""Private module for VTL validation functions."""
|
|
2
2
|
|
|
3
|
-
from vtlengine.API import create_ast
|
|
4
|
-
from vtlengine.AST import
|
|
5
|
-
DPRuleset as ASTDPRuleset,
|
|
6
|
-
)
|
|
3
|
+
from vtlengine.API import create_ast
|
|
4
|
+
from vtlengine.AST import DPRuleset as ASTDPRuleset
|
|
7
5
|
from vtlengine.AST import HRuleset as ASTHRuleset
|
|
8
|
-
from vtlengine.AST import
|
|
9
|
-
Operator as ASTOperator,
|
|
10
|
-
)
|
|
6
|
+
from vtlengine.AST import Operator as ASTOperator
|
|
11
7
|
|
|
12
8
|
from pysdmx.errors import Invalid
|
|
13
9
|
from pysdmx.model import Reference
|
|
@@ -37,14 +33,14 @@ def _ruleset_validation(ruleset: Ruleset) -> None:
|
|
|
37
33
|
ast.children[0], ASTDPRuleset
|
|
38
34
|
):
|
|
39
35
|
raise Invalid("Ruleset type does not match the definition")
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
36
|
+
|
|
37
|
+
child = ast.children[0]
|
|
38
|
+
signature_type = getattr(child, "signature_type", None)
|
|
39
|
+
if ruleset.ruleset_scope == "variable" and signature_type != "variable":
|
|
44
40
|
raise Invalid("Ruleset scope does not match the definition")
|
|
45
41
|
if (
|
|
46
42
|
ruleset.ruleset_scope == "valuedomain"
|
|
47
|
-
and
|
|
43
|
+
and signature_type != "valuedomain"
|
|
48
44
|
):
|
|
49
45
|
raise Invalid("Ruleset scope does not match the definition")
|
|
50
46
|
|
|
@@ -0,0 +1,333 @@
|
|
|
1
|
+
"""Conversions between pysdmx PandasDataset and vtlengine Dataset."""
|
|
2
|
+
|
|
3
|
+
from typing import Dict, Optional, Type, Union
|
|
4
|
+
|
|
5
|
+
from vtlengine.API import load_datasets # type: ignore[attr-defined]
|
|
6
|
+
from vtlengine.API._InternalApi import to_vtl_json
|
|
7
|
+
from vtlengine.DataTypes import (
|
|
8
|
+
Boolean,
|
|
9
|
+
Date,
|
|
10
|
+
Duration,
|
|
11
|
+
Integer,
|
|
12
|
+
Number,
|
|
13
|
+
ScalarType,
|
|
14
|
+
String,
|
|
15
|
+
TimeInterval,
|
|
16
|
+
TimePeriod,
|
|
17
|
+
)
|
|
18
|
+
from vtlengine.Model import Dataset as VTLengineDataset
|
|
19
|
+
from vtlengine.Model import Role as VTLRole
|
|
20
|
+
|
|
21
|
+
from pysdmx.errors import Invalid
|
|
22
|
+
from pysdmx.io.pd import PandasDataset
|
|
23
|
+
from pysdmx.model import Component, Components, Concept, Reference
|
|
24
|
+
from pysdmx.model.concept import DataType
|
|
25
|
+
from pysdmx.model.dataflow import Role, Schema
|
|
26
|
+
|
|
27
|
+
# VTL to SDMX type mapping
|
|
28
|
+
VTL_TO_SDMX_TYPE_MAP: Dict[Type[ScalarType], DataType] = {
|
|
29
|
+
String: DataType.STRING,
|
|
30
|
+
Integer: DataType.INTEGER,
|
|
31
|
+
Number: DataType.DOUBLE,
|
|
32
|
+
Boolean: DataType.BOOLEAN,
|
|
33
|
+
Date: DataType.DATE,
|
|
34
|
+
TimePeriod: DataType.PERIOD,
|
|
35
|
+
TimeInterval: DataType.TIME,
|
|
36
|
+
Duration: DataType.DURATION,
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
# Role mapping
|
|
40
|
+
# ViralAttribute is not yet supported as a separate role in VTL 1.2.2,
|
|
41
|
+
# so it is mapped to Attribute following vtlengine's behavior
|
|
42
|
+
VTL_TO_SDMX_ROLE_MAP: Dict[VTLRole, Role] = {
|
|
43
|
+
VTLRole.IDENTIFIER: Role.DIMENSION,
|
|
44
|
+
VTLRole.MEASURE: Role.MEASURE,
|
|
45
|
+
VTLRole.ATTRIBUTE: Role.ATTRIBUTE,
|
|
46
|
+
"ViralAttribute": Role.ATTRIBUTE, # type: ignore[dict-item]
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
VALID_SDMX_TYPES = {"DataStructure", "Dataflow", "ProvisionAgreement"}
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def convert_dataset_to_vtl(
|
|
53
|
+
dataset: PandasDataset, vtl_dataset_name: str
|
|
54
|
+
) -> VTLengineDataset:
|
|
55
|
+
"""Convert a PandasDataset to a vtlengine Dataset.
|
|
56
|
+
|
|
57
|
+
This function converts a PandasDataset, which contains both data and
|
|
58
|
+
structure (Schema), into a vtlengine Dataset. It uses vtlengine's
|
|
59
|
+
conversion functions to handle the Schema to VTL structure mapping.
|
|
60
|
+
|
|
61
|
+
It raises an Invalid exception if the dataset structure is not a
|
|
62
|
+
Schema object.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
dataset: The PandasDataset to convert.
|
|
66
|
+
vtl_dataset_name: The name for the vtlengine Dataset.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
A vtlengine Dataset with the data and structure from the
|
|
70
|
+
PandasDataset.
|
|
71
|
+
|
|
72
|
+
Raises:
|
|
73
|
+
Invalid: If the dataset structure is not a Schema object or if
|
|
74
|
+
component types cannot be mapped.
|
|
75
|
+
"""
|
|
76
|
+
if not isinstance(dataset.structure, Schema):
|
|
77
|
+
raise Invalid(
|
|
78
|
+
"Validation Error",
|
|
79
|
+
"Dataset structure must be a Schema object for conversion to VTL",
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
schema = dataset.structure
|
|
83
|
+
pd_dataset = dataset.data
|
|
84
|
+
|
|
85
|
+
# Use vtlengine's built-in conversion function to convert Schema to VTL
|
|
86
|
+
vtl_json = to_vtl_json(schema, vtl_dataset_name)
|
|
87
|
+
|
|
88
|
+
# Load the dataset structure using vtlengine's API
|
|
89
|
+
datasets, scalars = load_datasets(vtl_json)
|
|
90
|
+
vtl_dataset = datasets[vtl_dataset_name]
|
|
91
|
+
|
|
92
|
+
# Assign the pandas DataFrame to the VTL dataset
|
|
93
|
+
vtl_dataset.data = pd_dataset
|
|
94
|
+
|
|
95
|
+
return vtl_dataset
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def convert_dataset_to_sdmx(
|
|
99
|
+
dataset: VTLengineDataset,
|
|
100
|
+
reference: Optional[Reference] = None,
|
|
101
|
+
schema: Optional[Schema] = None,
|
|
102
|
+
) -> PandasDataset:
|
|
103
|
+
"""Convert a VTLengine Dataset to a PandasDataset.
|
|
104
|
+
|
|
105
|
+
This function converts a `vtlengine.Model.Dataset` into
|
|
106
|
+
a `PandasDataset` by:
|
|
107
|
+
|
|
108
|
+
* Using a provided `Schema` for direct validation and conversion.
|
|
109
|
+
* Generating a new SDMX-compatible `Schema` from the dataset components,
|
|
110
|
+
using metadata from a provided `Reference`.
|
|
111
|
+
|
|
112
|
+
When a `schema` is supplied, the dataset is first validated against it and,
|
|
113
|
+
if validation passes, the data is wrapped in a `PandasDataset` with that
|
|
114
|
+
schema. If no `schema` is provided, a `reference` must be given so a new
|
|
115
|
+
SDMX structure (with components, roles, and data types mapped from the
|
|
116
|
+
VTL dataset) can be created.
|
|
117
|
+
|
|
118
|
+
Invalid is raised in the following cases:
|
|
119
|
+
* If neither `schema` nor `reference` is provided.
|
|
120
|
+
* If the `reference` has an unsupported `sdmx_type`.
|
|
121
|
+
* If the `dataset` contains no data.
|
|
122
|
+
* If component types or roles cannot be mapped to SDMX equivalents.
|
|
123
|
+
* If validation fails when a `schema` is provided.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
dataset: The VTLengine dataset to convert.
|
|
127
|
+
Must include components and associated data.
|
|
128
|
+
reference: Optional reference to the SDMX structure
|
|
129
|
+
(DataStructure, Dataflow, or ProvisionAgreement).
|
|
130
|
+
Required only when no `schema` is provided.
|
|
131
|
+
Used to build a schema and supply contextual identifiers.
|
|
132
|
+
schema: Optional schema describing the SDMX structure.
|
|
133
|
+
If provided, the dataset is validated against it
|
|
134
|
+
and the same schema is used directly in the output.
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
A `PandasDataset` containing the converted data and the associated SDMX
|
|
138
|
+
structure (either the provided schema or a generated one).
|
|
139
|
+
|
|
140
|
+
Raises:
|
|
141
|
+
Invalid: If the reference sdmx_type is not valid, if component types
|
|
142
|
+
cannot be mapped, or if validation fails when schema is provided.
|
|
143
|
+
"""
|
|
144
|
+
# If schema is provided
|
|
145
|
+
if schema is not None:
|
|
146
|
+
_validate_vtl_dataset_against_schema(dataset, schema)
|
|
147
|
+
|
|
148
|
+
data = dataset.data
|
|
149
|
+
if data is None:
|
|
150
|
+
raise Invalid(
|
|
151
|
+
"Validation Error",
|
|
152
|
+
"VTL dataset has no data for conversion to SDMX",
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
pandas_dataset = PandasDataset(
|
|
156
|
+
structure=schema,
|
|
157
|
+
data=data,
|
|
158
|
+
)
|
|
159
|
+
return pandas_dataset
|
|
160
|
+
|
|
161
|
+
# If schema is not provided, reference must be provided
|
|
162
|
+
if reference is None:
|
|
163
|
+
raise Invalid(
|
|
164
|
+
"Validation Error",
|
|
165
|
+
"Either schema or reference must be provided",
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
# Validate reference.sdmx_type
|
|
169
|
+
if reference.sdmx_type not in VALID_SDMX_TYPES:
|
|
170
|
+
raise Invalid(
|
|
171
|
+
"Validation Error",
|
|
172
|
+
f"Reference sdmx_type must be one of {VALID_SDMX_TYPES}, "
|
|
173
|
+
f"but got '{reference.sdmx_type}'",
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
data = dataset.data
|
|
177
|
+
if data is None:
|
|
178
|
+
raise Invalid(
|
|
179
|
+
"Validation Error",
|
|
180
|
+
"VTL dataset has no data for conversion to SDMX",
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
# Generate a new Schema from VTL Dataset components
|
|
184
|
+
sdmx_components = []
|
|
185
|
+
|
|
186
|
+
for comp_name, vtl_comp in dataset.components.items():
|
|
187
|
+
# Map VTL data type to SDMX data type
|
|
188
|
+
sdmx_dtype = _map_vtl_dtype_to_sdmx(vtl_comp.data_type)
|
|
189
|
+
|
|
190
|
+
# Map VTL role to SDMX role
|
|
191
|
+
sdmx_role = _map_vtl_role_to_sdmx(vtl_comp.role)
|
|
192
|
+
|
|
193
|
+
# Determine attachment_level for attributes
|
|
194
|
+
attachment_level = "O" if sdmx_role == Role.ATTRIBUTE else None
|
|
195
|
+
|
|
196
|
+
# Create SDMX Component
|
|
197
|
+
sdmx_comp = Component(
|
|
198
|
+
id=comp_name,
|
|
199
|
+
required=not vtl_comp.nullable,
|
|
200
|
+
role=sdmx_role,
|
|
201
|
+
concept=Concept(comp_name, dtype=sdmx_dtype),
|
|
202
|
+
attachment_level=attachment_level,
|
|
203
|
+
)
|
|
204
|
+
sdmx_components.append(sdmx_comp)
|
|
205
|
+
|
|
206
|
+
# Create Schema using reference information
|
|
207
|
+
generated_schema = Schema(
|
|
208
|
+
context=reference.sdmx_type.lower(), # type: ignore[arg-type]
|
|
209
|
+
agency=reference.agency,
|
|
210
|
+
id=reference.id,
|
|
211
|
+
version=reference.version,
|
|
212
|
+
components=Components(sdmx_components),
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
pandas_dataset = PandasDataset(
|
|
216
|
+
structure=generated_schema,
|
|
217
|
+
data=data,
|
|
218
|
+
)
|
|
219
|
+
return pandas_dataset
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def _map_vtl_dtype_to_sdmx(
|
|
223
|
+
vtl_dtype_value: Union[ScalarType, Type[ScalarType]],
|
|
224
|
+
) -> DataType:
|
|
225
|
+
"""Return the SDMX DataType for a given VTL scalar.
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
vtl_dtype_value: The VTL scalar type or instance to map.
|
|
229
|
+
|
|
230
|
+
Returns:
|
|
231
|
+
The corresponding SDMX DataType.
|
|
232
|
+
|
|
233
|
+
Raises:
|
|
234
|
+
Invalid: If the VTL DataType cannot be mapped to an SDMX DataType.
|
|
235
|
+
"""
|
|
236
|
+
if isinstance(vtl_dtype_value, type):
|
|
237
|
+
vtl_dtype_class: type[ScalarType] = vtl_dtype_value
|
|
238
|
+
else:
|
|
239
|
+
vtl_dtype_class = type(vtl_dtype_value)
|
|
240
|
+
|
|
241
|
+
if vtl_dtype_class not in VTL_TO_SDMX_TYPE_MAP:
|
|
242
|
+
supported = ", ".join(str(t.__name__) for t in VTL_TO_SDMX_TYPE_MAP)
|
|
243
|
+
raise Invalid(
|
|
244
|
+
"Validation Error",
|
|
245
|
+
f"VTL DataType '{vtl_dtype_class.__name__}' cannot be "
|
|
246
|
+
f"mapped to an SDMX type. Supported types are: {supported}",
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
return VTL_TO_SDMX_TYPE_MAP[vtl_dtype_class]
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def _map_vtl_role_to_sdmx(vtl_role: VTLRole) -> Role:
|
|
253
|
+
"""Return the SDMX Role for a given VTL Role.
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
vtl_role: The VTLRole to map.
|
|
257
|
+
|
|
258
|
+
Returns:
|
|
259
|
+
The corresponding SDMX Role.
|
|
260
|
+
|
|
261
|
+
Raises:
|
|
262
|
+
Invalid: If the VTL Role cannot be mapped to an SDMX Role.
|
|
263
|
+
"""
|
|
264
|
+
if vtl_role not in VTL_TO_SDMX_ROLE_MAP:
|
|
265
|
+
raise Invalid(
|
|
266
|
+
"Validation Error",
|
|
267
|
+
f"VTL Role '{vtl_role}' cannot be mapped to an SDMX Role",
|
|
268
|
+
)
|
|
269
|
+
return VTL_TO_SDMX_ROLE_MAP[vtl_role]
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def _validate_vtl_dataset_against_schema(
|
|
273
|
+
dataset: VTLengineDataset,
|
|
274
|
+
schema: Schema,
|
|
275
|
+
) -> None:
|
|
276
|
+
"""Validate VTLengine Dataset against SDMX Schema.
|
|
277
|
+
|
|
278
|
+
Args:
|
|
279
|
+
dataset: The VTLengineDataset instance whose components, roles, and
|
|
280
|
+
data types will be validated.
|
|
281
|
+
schema: The SDMX Schema that defines the expected components,
|
|
282
|
+
SDMX data types, and SDMX roles for validation.
|
|
283
|
+
|
|
284
|
+
Raises:
|
|
285
|
+
Invalid: If component names differ, if types or roles cannot be mapped,
|
|
286
|
+
or if any mismatch is detected between the Dataset and Schema.
|
|
287
|
+
"""
|
|
288
|
+
# Validate that schema components match VTL dataset components
|
|
289
|
+
vtl_component_names = set(dataset.components.keys())
|
|
290
|
+
schema_component_names = {comp.id for comp in schema.components}
|
|
291
|
+
|
|
292
|
+
if vtl_component_names != schema_component_names:
|
|
293
|
+
missing_in_schema = vtl_component_names - schema_component_names
|
|
294
|
+
missing_in_vtl = schema_component_names - vtl_component_names
|
|
295
|
+
error_parts = []
|
|
296
|
+
if missing_in_schema:
|
|
297
|
+
error_parts.append(
|
|
298
|
+
f"VTL components not in Schema: {missing_in_schema}"
|
|
299
|
+
)
|
|
300
|
+
if missing_in_vtl:
|
|
301
|
+
error_parts.append(
|
|
302
|
+
f"Schema components not in VTL: {missing_in_vtl}"
|
|
303
|
+
)
|
|
304
|
+
raise Invalid(
|
|
305
|
+
"Validation Error",
|
|
306
|
+
"Component mismatch between VTL Dataset and Schema. "
|
|
307
|
+
f"{'; '.join(error_parts)}",
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
# Validate that component types and roles match
|
|
311
|
+
for component in schema.components:
|
|
312
|
+
comp_id = str(component.id)
|
|
313
|
+
vtl_comp = dataset.components[comp_id]
|
|
314
|
+
|
|
315
|
+
# Validate data type using helper
|
|
316
|
+
expected_sdmx_dtype = _map_vtl_dtype_to_sdmx(vtl_comp.data_type)
|
|
317
|
+
if component.dtype != expected_sdmx_dtype:
|
|
318
|
+
raise Invalid(
|
|
319
|
+
"Validation Error",
|
|
320
|
+
"Component mismatch between VTL Dataset and Schema. "
|
|
321
|
+
f"Component '{comp_id}' has type {expected_sdmx_dtype} "
|
|
322
|
+
f"in VTL but {component.dtype} in Schema",
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
# Validate role using helper
|
|
326
|
+
expected_sdmx_role = _map_vtl_role_to_sdmx(vtl_comp.role)
|
|
327
|
+
if component.role != expected_sdmx_role:
|
|
328
|
+
raise Invalid(
|
|
329
|
+
"Validation Error",
|
|
330
|
+
"Component mismatch between VTL Dataset and Schema. "
|
|
331
|
+
f"Component '{comp_id}' has role {expected_sdmx_role} "
|
|
332
|
+
f"in VTL but {component.role} in Schema",
|
|
333
|
+
)
|