pysdmx 1.8.1__py3-none-any.whl → 1.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. pysdmx/__extras_check.py +15 -1
  2. pysdmx/__init__.py +1 -1
  3. pysdmx/api/fmr/__init__.py +3 -2
  4. pysdmx/io/input_processor.py +9 -6
  5. pysdmx/io/json/fusion/messages/category.py +69 -41
  6. pysdmx/io/json/sdmxjson2/messages/__init__.py +4 -0
  7. pysdmx/io/json/sdmxjson2/messages/category.py +76 -43
  8. pysdmx/io/json/sdmxjson2/messages/code.py +16 -6
  9. pysdmx/io/json/sdmxjson2/messages/constraint.py +235 -16
  10. pysdmx/io/json/sdmxjson2/messages/core.py +2 -1
  11. pysdmx/io/json/sdmxjson2/messages/dsd.py +35 -7
  12. pysdmx/io/json/sdmxjson2/messages/map.py +5 -4
  13. pysdmx/io/json/sdmxjson2/messages/metadataflow.py +1 -0
  14. pysdmx/io/json/sdmxjson2/messages/msd.py +18 -10
  15. pysdmx/io/json/sdmxjson2/messages/schema.py +2 -2
  16. pysdmx/io/json/sdmxjson2/messages/structure.py +81 -44
  17. pysdmx/io/json/sdmxjson2/messages/vtl.py +13 -9
  18. pysdmx/io/json/sdmxjson2/reader/doc_validation.py +112 -0
  19. pysdmx/io/json/sdmxjson2/reader/metadata.py +8 -1
  20. pysdmx/io/json/sdmxjson2/reader/structure.py +9 -2
  21. pysdmx/io/reader.py +18 -4
  22. pysdmx/io/xml/__data_aux.py +9 -4
  23. pysdmx/io/xml/__parse_xml.py +2 -0
  24. pysdmx/io/xml/__structure_aux_reader.py +70 -0
  25. pysdmx/io/xml/__structure_aux_writer.py +63 -9
  26. pysdmx/io/xml/__tokens.py +3 -0
  27. pysdmx/io/xml/__write_aux.py +35 -30
  28. pysdmx/io/xml/header.py +48 -35
  29. pysdmx/model/__base.py +47 -2
  30. pysdmx/model/__init__.py +18 -0
  31. pysdmx/model/category.py +23 -1
  32. pysdmx/model/constraint.py +69 -0
  33. pysdmx/model/message.py +97 -72
  34. pysdmx/toolkit/vtl/__init__.py +10 -1
  35. pysdmx/toolkit/vtl/_validations.py +8 -12
  36. pysdmx/toolkit/vtl/convert.py +333 -0
  37. pysdmx/toolkit/vtl/script_generation.py +1 -1
  38. pysdmx/util/_model_utils.py +40 -3
  39. {pysdmx-1.8.1.dist-info → pysdmx-1.10.0.dist-info}/METADATA +6 -3
  40. {pysdmx-1.8.1.dist-info → pysdmx-1.10.0.dist-info}/RECORD +42 -39
  41. {pysdmx-1.8.1.dist-info → pysdmx-1.10.0.dist-info}/WHEEL +0 -0
  42. {pysdmx-1.8.1.dist-info → pysdmx-1.10.0.dist-info}/licenses/LICENSE +0 -0
pysdmx/model/message.py CHANGED
@@ -19,10 +19,11 @@ from typing import Any, Dict, List, Optional, Sequence, Type, Union
19
19
  from msgspec import Struct
20
20
 
21
21
  from pysdmx.errors import Invalid, NotFound
22
- from pysdmx.model.__base import ItemScheme, MaintainableArtefact, Organisation
22
+ from pysdmx.model.__base import MaintainableArtefact, Organisation
23
23
  from pysdmx.model.category import Categorisation, CategoryScheme
24
24
  from pysdmx.model.code import Codelist, Hierarchy, HierarchyAssociation
25
25
  from pysdmx.model.concept import ConceptScheme
26
+ from pysdmx.model.constraint import DataConstraint
26
27
  from pysdmx.model.dataflow import (
27
28
  Dataflow,
28
29
  DataStructureDefinition,
@@ -87,7 +88,7 @@ class Header(Struct, repr_omit_defaults=True, kw_only=True):
87
88
  test: bool = False
88
89
  prepared: datetime = datetime.now(timezone.utc)
89
90
  sender: Organisation = Organisation(id="ZZZ")
90
- receiver: Optional[Organisation] = None
91
+ receiver: Sequence[Organisation] = ()
91
92
  source: Optional[str] = None
92
93
  dataset_action: Optional[ActionType] = None
93
94
  structure: Optional[Dict[str, str]] = None
@@ -99,7 +100,13 @@ class Header(Struct, repr_omit_defaults=True, kw_only=True):
99
100
  self.sender = Organisation(id=self.sender)
100
101
 
101
102
  if isinstance(self.receiver, str):
102
- self.receiver = Organisation(id=self.receiver)
103
+ self.receiver = [Organisation(id=self.receiver)]
104
+
105
+ if isinstance(self.receiver, Organisation):
106
+ self.receiver = [self.receiver]
107
+
108
+ if self.receiver is None:
109
+ self.receiver = ()
103
110
 
104
111
  def __str__(self) -> str:
105
112
  """Custom string representation without the class name."""
@@ -171,7 +178,9 @@ class StructureMessage(Struct, repr_omit_defaults=True, frozen=True):
171
178
  attrs.append(f"{attr}={repr(value)}")
172
179
  return f"{self.__class__.__name__}({', '.join(attrs)})"
173
180
 
174
- def __get_elements(self, type_: Type[Any]) -> List[Any]:
181
+ # Returns MaintainableArtefacts only, but mypy complains.
182
+ # As it is an internal method, it's acceptable.
183
+ def __get_elements(self, type_: Type[MaintainableArtefact]) -> List[Any]:
175
184
  """Returns a list of elements of a specific type."""
176
185
  if self.structures is None:
177
186
  raise NotFound(
@@ -180,6 +189,8 @@ class StructureMessage(Struct, repr_omit_defaults=True, frozen=True):
180
189
  structures = [e for e in self.structures if isinstance(e, type_)]
181
190
  return structures
182
191
 
192
+ # Returns Codelist or ValueList only, but mypy complains.
193
+ # As it is an internal method, it's acceptable.
183
194
  def __get_enumerations(
184
195
  self, type_: Type[Any], is_vl: bool = False
185
196
  ) -> List[Any]:
@@ -188,9 +199,11 @@ class StructureMessage(Struct, repr_omit_defaults=True, frozen=True):
188
199
  t = "valuelist" if is_vl else "codelist"
189
200
  return [e for e in enums if e.sdmx_type == t]
190
201
 
202
+ # Returns MaintainableArtefacts only,
203
+ # but mypy complains. As it is an internal method, it's acceptable.
191
204
  def __get_single_structure(
192
205
  self,
193
- type_: Type[Union[ItemScheme, DataStructureDefinition, Dataflow]],
206
+ type_: Type[MaintainableArtefact],
194
207
  short_urn: str,
195
208
  ) -> Any:
196
209
  """Returns a specific element from content."""
@@ -208,10 +221,20 @@ class StructureMessage(Struct, repr_omit_defaults=True, frozen=True):
208
221
  "Could not find the requested element.",
209
222
  )
210
223
 
224
+ # Return collection of a certain type
225
+
211
226
  def get_agency_schemes(self) -> List[AgencyScheme]:
212
227
  """Returns the AgencySchemes."""
213
228
  return self.__get_elements(AgencyScheme)
214
229
 
230
+ def get_categorisations(self) -> List[Categorisation]:
231
+ """Returns the Categorisations."""
232
+ return self.__get_elements(Categorisation)
233
+
234
+ def get_category_schemes(self) -> List[CategoryScheme]:
235
+ """Returns the CategorySchemes."""
236
+ return self.__get_elements(CategoryScheme)
237
+
215
238
  def get_codelists(self) -> List[Codelist]:
216
239
  """Returns the Codelists."""
217
240
  return self.__get_enumerations(Codelist, False)
@@ -220,6 +243,10 @@ class StructureMessage(Struct, repr_omit_defaults=True, frozen=True):
220
243
  """Returns the Concept Schemes."""
221
244
  return self.__get_elements(ConceptScheme)
222
245
 
246
+ def get_custom_type_schemes(self) -> List[CustomTypeScheme]:
247
+ """Returns the CustomType Schemes."""
248
+ return self.__get_elements(CustomTypeScheme)
249
+
223
250
  def get_data_structure_definitions(
224
251
  self,
225
252
  ) -> List[DataStructureDefinition]:
@@ -230,53 +257,13 @@ class StructureMessage(Struct, repr_omit_defaults=True, frozen=True):
230
257
  """Returns the Dataflows."""
231
258
  return self.__get_elements(Dataflow)
232
259
 
233
- def get_metadataflows(self) -> List[Metadataflow]:
234
- """Returns the MetadataProvisionAgreements."""
235
- return self.__get_elements(Metadataflow)
236
-
237
- def get_organisation_scheme(self, short_urn: str) -> AgencyScheme:
238
- """Returns a specific OrganisationScheme."""
239
- return self.__get_single_structure(AgencyScheme, short_urn)
240
-
241
- def get_codelist(self, short_urn: str) -> Codelist:
242
- """Returns a specific Codelist."""
243
- return self.__get_single_structure(Codelist, short_urn)
244
-
245
- def get_concept_scheme(self, short_urn: str) -> ConceptScheme:
246
- """Returns a specific Concept Scheme."""
247
- return self.__get_single_structure(ConceptScheme, short_urn)
248
-
249
- def get_data_structure_definition(
250
- self, short_urn: str
251
- ) -> DataStructureDefinition:
252
- """Returns a specific DataStructureDefinition."""
253
- return self.__get_single_structure(DataStructureDefinition, short_urn)
260
+ def get_data_constraints(self) -> List[DataConstraint]:
261
+ """Returns the DataConstraints."""
262
+ return self.__get_elements(DataConstraint)
254
263
 
255
- def get_dataflow(self, short_urn: str) -> Dataflow:
256
- """Returns a specific Dataflow."""
257
- return self.__get_single_structure(Dataflow, short_urn)
258
-
259
- def get_transformation_schemes(self) -> List[TransformationScheme]:
260
- """Returns the TransformationSchemes."""
261
- return self.__get_elements(TransformationScheme)
262
-
263
- def get_user_defined_operator_schemes(
264
- self,
265
- ) -> List[UserDefinedOperatorScheme]:
266
- """Returns the UserDefinedOperatorSchemes."""
267
- return self.__get_elements(UserDefinedOperatorScheme)
268
-
269
- def get_ruleset_schemes(self) -> List[RulesetScheme]:
270
- """Returns the RulesetSchemes."""
271
- return self.__get_elements(RulesetScheme)
272
-
273
- def get_category_schemes(self) -> List[CategoryScheme]:
274
- """Returns the CategorySchemes."""
275
- return self.__get_elements(CategoryScheme)
276
-
277
- def get_value_lists(self) -> List[Codelist]:
278
- """Returns the Codelists."""
279
- return self.__get_enumerations(Codelist, True)
264
+ def get_data_provider_schemes(self) -> List[DataProviderScheme]:
265
+ """Returns the DataProviderSchemes."""
266
+ return self.__get_elements(DataProviderScheme)
280
267
 
281
268
  def get_hierarchies(self) -> List[Hierarchy]:
282
269
  """Returns the HierarchyCodelists."""
@@ -286,13 +273,9 @@ class StructureMessage(Struct, repr_omit_defaults=True, frozen=True):
286
273
  """Returns the HierarchyAssociations."""
287
274
  return self.__get_elements(HierarchyAssociation)
288
275
 
289
- def get_data_provider_schemes(self) -> List[DataProviderScheme]:
290
- """Returns the DataProviderSchemes."""
291
- return self.__get_elements(DataProviderScheme)
292
-
293
- def get_provision_agreements(self) -> List[ProvisionAgreement]:
294
- """Returns the ProvisionAgreements."""
295
- return self.__get_elements(ProvisionAgreement)
276
+ def get_metadataflows(self) -> List[Metadataflow]:
277
+ """Returns the MetadataProvisionAgreements."""
278
+ return self.__get_elements(Metadataflow)
296
279
 
297
280
  def get_metadata_provider_schemes(self) -> List[MetadataProviderScheme]:
298
281
  """Returns the MetadataProviderSchemes."""
@@ -308,9 +291,15 @@ class StructureMessage(Struct, repr_omit_defaults=True, frozen=True):
308
291
  """Returns the MetadataStructures."""
309
292
  return self.__get_elements(MetadataStructure)
310
293
 
311
- def get_structure_maps(self) -> List[StructureMap]:
312
- """Returns the StructureMaps."""
313
- return self.__get_elements(StructureMap)
294
+ def get_name_personalisation_schemes(
295
+ self,
296
+ ) -> List[NamePersonalisationScheme]:
297
+ """Returns the NamePersonalisationSchemes."""
298
+ return self.__get_elements(NamePersonalisationScheme)
299
+
300
+ def get_provision_agreements(self) -> List[ProvisionAgreement]:
301
+ """Returns the ProvisionAgreements."""
302
+ return self.__get_elements(ProvisionAgreement)
314
303
 
315
304
  def get_representation_maps(
316
305
  self,
@@ -321,23 +310,59 @@ class StructureMessage(Struct, repr_omit_defaults=True, frozen=True):
321
310
  out.extend(self.__get_elements(MultiRepresentationMap))
322
311
  return out
323
312
 
324
- def get_categorisations(self) -> List[Categorisation]:
325
- """Returns the Categorisations."""
326
- return self.__get_elements(Categorisation)
313
+ def get_ruleset_schemes(self) -> List[RulesetScheme]:
314
+ """Returns the RulesetSchemes."""
315
+ return self.__get_elements(RulesetScheme)
327
316
 
328
- def get_custom_type_schemes(self) -> List[CustomTypeScheme]:
329
- """Returns the CustomType Schemes."""
330
- return self.__get_elements(CustomTypeScheme)
317
+ def get_structure_maps(self) -> List[StructureMap]:
318
+ """Returns the StructureMaps."""
319
+ return self.__get_elements(StructureMap)
320
+
321
+ def get_transformation_schemes(self) -> List[TransformationScheme]:
322
+ """Returns the TransformationSchemes."""
323
+ return self.__get_elements(TransformationScheme)
324
+
325
+ def get_user_defined_operator_schemes(
326
+ self,
327
+ ) -> List[UserDefinedOperatorScheme]:
328
+ """Returns the UserDefinedOperatorSchemes."""
329
+ return self.__get_elements(UserDefinedOperatorScheme)
330
+
331
+ def get_value_lists(self) -> List[Codelist]:
332
+ """Returns the Codelists."""
333
+ return self.__get_enumerations(Codelist, True)
331
334
 
332
335
  def get_vtl_mapping_schemes(self) -> List[VtlMappingScheme]:
333
336
  """Returns the VTL Mapping Schemes."""
334
337
  return self.__get_elements(VtlMappingScheme)
335
338
 
336
- def get_name_personalisation_schemes(
337
- self,
338
- ) -> List[NamePersonalisationScheme]:
339
- """Returns the NamePersonalisationSchemes."""
340
- return self.__get_elements(NamePersonalisationScheme)
339
+ # Return individual items
340
+
341
+ def get_codelist(self, short_urn: str) -> Codelist:
342
+ """Returns a specific Codelist."""
343
+ return self.__get_single_structure(Codelist, short_urn)
344
+
345
+ def get_concept_scheme(self, short_urn: str) -> ConceptScheme:
346
+ """Returns a specific Concept Scheme."""
347
+ return self.__get_single_structure(ConceptScheme, short_urn)
348
+
349
+ def get_dataflow(self, short_urn: str) -> Dataflow:
350
+ """Returns a specific Dataflow."""
351
+ return self.__get_single_structure(Dataflow, short_urn)
352
+
353
+ def get_data_structure_definition(
354
+ self, short_urn: str
355
+ ) -> DataStructureDefinition:
356
+ """Returns a specific DataStructureDefinition."""
357
+ return self.__get_single_structure(DataStructureDefinition, short_urn)
358
+
359
+ def get_organisation_scheme(self, short_urn: str) -> AgencyScheme:
360
+ """Returns a specific AgencyScheme."""
361
+ return self.__get_single_structure(AgencyScheme, short_urn)
362
+
363
+ def get_provision_agreement(self, short_urn: str) -> ProvisionAgreement:
364
+ """Returns a specific Provision Agreement."""
365
+ return self.__get_single_structure(ProvisionAgreement, short_urn)
341
366
 
342
367
 
343
368
  class MetadataMessage(Struct, frozen=True):
@@ -1,6 +1,15 @@
1
1
  """VTL toolkit for PySDMX."""
2
2
 
3
+ from pysdmx.toolkit.vtl.convert import (
4
+ convert_dataset_to_sdmx,
5
+ convert_dataset_to_vtl,
6
+ )
3
7
  from pysdmx.toolkit.vtl.script_generation import generate_vtl_script
4
8
  from pysdmx.toolkit.vtl.validation import model_validations
5
9
 
6
- __all__ = ["model_validations", "generate_vtl_script"]
10
+ __all__ = [
11
+ "model_validations",
12
+ "generate_vtl_script",
13
+ "convert_dataset_to_vtl",
14
+ "convert_dataset_to_sdmx",
15
+ ]
@@ -1,13 +1,9 @@
1
1
  """Private module for VTL validation functions."""
2
2
 
3
- from vtlengine.API import create_ast # type: ignore[import-untyped]
4
- from vtlengine.AST import ( # type: ignore[import-untyped]
5
- DPRuleset as ASTDPRuleset,
6
- )
3
+ from vtlengine.API import create_ast
4
+ from vtlengine.AST import DPRuleset as ASTDPRuleset
7
5
  from vtlengine.AST import HRuleset as ASTHRuleset
8
- from vtlengine.AST import (
9
- Operator as ASTOperator,
10
- )
6
+ from vtlengine.AST import Operator as ASTOperator
11
7
 
12
8
  from pysdmx.errors import Invalid
13
9
  from pysdmx.model import Reference
@@ -37,14 +33,14 @@ def _ruleset_validation(ruleset: Ruleset) -> None:
37
33
  ast.children[0], ASTDPRuleset
38
34
  ):
39
35
  raise Invalid("Ruleset type does not match the definition")
40
- if (
41
- ruleset.ruleset_scope == "variable"
42
- and ast.children[0].signature_type != "variable"
43
- ):
36
+
37
+ child = ast.children[0]
38
+ signature_type = getattr(child, "signature_type", None)
39
+ if ruleset.ruleset_scope == "variable" and signature_type != "variable":
44
40
  raise Invalid("Ruleset scope does not match the definition")
45
41
  if (
46
42
  ruleset.ruleset_scope == "valuedomain"
47
- and ast.children[0].signature_type != "valuedomain"
43
+ and signature_type != "valuedomain"
48
44
  ):
49
45
  raise Invalid("Ruleset scope does not match the definition")
50
46
 
@@ -0,0 +1,333 @@
1
+ """Conversions between pysdmx PandasDataset and vtlengine Dataset."""
2
+
3
+ from typing import Dict, Optional, Type, Union
4
+
5
+ from vtlengine.API import load_datasets # type: ignore[attr-defined]
6
+ from vtlengine.API._InternalApi import to_vtl_json
7
+ from vtlengine.DataTypes import (
8
+ Boolean,
9
+ Date,
10
+ Duration,
11
+ Integer,
12
+ Number,
13
+ ScalarType,
14
+ String,
15
+ TimeInterval,
16
+ TimePeriod,
17
+ )
18
+ from vtlengine.Model import Dataset as VTLengineDataset
19
+ from vtlengine.Model import Role as VTLRole
20
+
21
+ from pysdmx.errors import Invalid
22
+ from pysdmx.io.pd import PandasDataset
23
+ from pysdmx.model import Component, Components, Concept, Reference
24
+ from pysdmx.model.concept import DataType
25
+ from pysdmx.model.dataflow import Role, Schema
26
+
27
+ # VTL to SDMX type mapping
28
+ VTL_TO_SDMX_TYPE_MAP: Dict[Type[ScalarType], DataType] = {
29
+ String: DataType.STRING,
30
+ Integer: DataType.INTEGER,
31
+ Number: DataType.DOUBLE,
32
+ Boolean: DataType.BOOLEAN,
33
+ Date: DataType.DATE,
34
+ TimePeriod: DataType.PERIOD,
35
+ TimeInterval: DataType.TIME,
36
+ Duration: DataType.DURATION,
37
+ }
38
+
39
+ # Role mapping
40
+ # ViralAttribute is not yet supported as a separate role in VTL 1.2.2,
41
+ # so it is mapped to Attribute following vtlengine's behavior
42
+ VTL_TO_SDMX_ROLE_MAP: Dict[VTLRole, Role] = {
43
+ VTLRole.IDENTIFIER: Role.DIMENSION,
44
+ VTLRole.MEASURE: Role.MEASURE,
45
+ VTLRole.ATTRIBUTE: Role.ATTRIBUTE,
46
+ "ViralAttribute": Role.ATTRIBUTE, # type: ignore[dict-item]
47
+ }
48
+
49
+ VALID_SDMX_TYPES = {"DataStructure", "Dataflow", "ProvisionAgreement"}
50
+
51
+
52
+ def convert_dataset_to_vtl(
53
+ dataset: PandasDataset, vtl_dataset_name: str
54
+ ) -> VTLengineDataset:
55
+ """Convert a PandasDataset to a vtlengine Dataset.
56
+
57
+ This function converts a PandasDataset, which contains both data and
58
+ structure (Schema), into a vtlengine Dataset. It uses vtlengine's
59
+ conversion functions to handle the Schema to VTL structure mapping.
60
+
61
+ It raises an Invalid exception if the dataset structure is not a
62
+ Schema object.
63
+
64
+ Args:
65
+ dataset: The PandasDataset to convert.
66
+ vtl_dataset_name: The name for the vtlengine Dataset.
67
+
68
+ Returns:
69
+ A vtlengine Dataset with the data and structure from the
70
+ PandasDataset.
71
+
72
+ Raises:
73
+ Invalid: If the dataset structure is not a Schema object or if
74
+ component types cannot be mapped.
75
+ """
76
+ if not isinstance(dataset.structure, Schema):
77
+ raise Invalid(
78
+ "Validation Error",
79
+ "Dataset structure must be a Schema object for conversion to VTL",
80
+ )
81
+
82
+ schema = dataset.structure
83
+ pd_dataset = dataset.data
84
+
85
+ # Use vtlengine's built-in conversion function to convert Schema to VTL
86
+ vtl_json = to_vtl_json(schema, vtl_dataset_name)
87
+
88
+ # Load the dataset structure using vtlengine's API
89
+ datasets, scalars = load_datasets(vtl_json)
90
+ vtl_dataset = datasets[vtl_dataset_name]
91
+
92
+ # Assign the pandas DataFrame to the VTL dataset
93
+ vtl_dataset.data = pd_dataset
94
+
95
+ return vtl_dataset
96
+
97
+
98
+ def convert_dataset_to_sdmx(
99
+ dataset: VTLengineDataset,
100
+ reference: Optional[Reference] = None,
101
+ schema: Optional[Schema] = None,
102
+ ) -> PandasDataset:
103
+ """Convert a VTLengine Dataset to a PandasDataset.
104
+
105
+ This function converts a `vtlengine.Model.Dataset` into
106
+ a `PandasDataset` by:
107
+
108
+ * Using a provided `Schema` for direct validation and conversion.
109
+ * Generating a new SDMX-compatible `Schema` from the dataset components,
110
+ using metadata from a provided `Reference`.
111
+
112
+ When a `schema` is supplied, the dataset is first validated against it and,
113
+ if validation passes, the data is wrapped in a `PandasDataset` with that
114
+ schema. If no `schema` is provided, a `reference` must be given so a new
115
+ SDMX structure (with components, roles, and data types mapped from the
116
+ VTL dataset) can be created.
117
+
118
+ Invalid is raised in the following cases:
119
+ * If neither `schema` nor `reference` is provided.
120
+ * If the `reference` has an unsupported `sdmx_type`.
121
+ * If the `dataset` contains no data.
122
+ * If component types or roles cannot be mapped to SDMX equivalents.
123
+ * If validation fails when a `schema` is provided.
124
+
125
+ Args:
126
+ dataset: The VTLengine dataset to convert.
127
+ Must include components and associated data.
128
+ reference: Optional reference to the SDMX structure
129
+ (DataStructure, Dataflow, or ProvisionAgreement).
130
+ Required only when no `schema` is provided.
131
+ Used to build a schema and supply contextual identifiers.
132
+ schema: Optional schema describing the SDMX structure.
133
+ If provided, the dataset is validated against it
134
+ and the same schema is used directly in the output.
135
+
136
+ Returns:
137
+ A `PandasDataset` containing the converted data and the associated SDMX
138
+ structure (either the provided schema or a generated one).
139
+
140
+ Raises:
141
+ Invalid: If the reference sdmx_type is not valid, if component types
142
+ cannot be mapped, or if validation fails when schema is provided.
143
+ """
144
+ # If schema is provided
145
+ if schema is not None:
146
+ _validate_vtl_dataset_against_schema(dataset, schema)
147
+
148
+ data = dataset.data
149
+ if data is None:
150
+ raise Invalid(
151
+ "Validation Error",
152
+ "VTL dataset has no data for conversion to SDMX",
153
+ )
154
+
155
+ pandas_dataset = PandasDataset(
156
+ structure=schema,
157
+ data=data,
158
+ )
159
+ return pandas_dataset
160
+
161
+ # If schema is not provided, reference must be provided
162
+ if reference is None:
163
+ raise Invalid(
164
+ "Validation Error",
165
+ "Either schema or reference must be provided",
166
+ )
167
+
168
+ # Validate reference.sdmx_type
169
+ if reference.sdmx_type not in VALID_SDMX_TYPES:
170
+ raise Invalid(
171
+ "Validation Error",
172
+ f"Reference sdmx_type must be one of {VALID_SDMX_TYPES}, "
173
+ f"but got '{reference.sdmx_type}'",
174
+ )
175
+
176
+ data = dataset.data
177
+ if data is None:
178
+ raise Invalid(
179
+ "Validation Error",
180
+ "VTL dataset has no data for conversion to SDMX",
181
+ )
182
+
183
+ # Generate a new Schema from VTL Dataset components
184
+ sdmx_components = []
185
+
186
+ for comp_name, vtl_comp in dataset.components.items():
187
+ # Map VTL data type to SDMX data type
188
+ sdmx_dtype = _map_vtl_dtype_to_sdmx(vtl_comp.data_type)
189
+
190
+ # Map VTL role to SDMX role
191
+ sdmx_role = _map_vtl_role_to_sdmx(vtl_comp.role)
192
+
193
+ # Determine attachment_level for attributes
194
+ attachment_level = "O" if sdmx_role == Role.ATTRIBUTE else None
195
+
196
+ # Create SDMX Component
197
+ sdmx_comp = Component(
198
+ id=comp_name,
199
+ required=not vtl_comp.nullable,
200
+ role=sdmx_role,
201
+ concept=Concept(comp_name, dtype=sdmx_dtype),
202
+ attachment_level=attachment_level,
203
+ )
204
+ sdmx_components.append(sdmx_comp)
205
+
206
+ # Create Schema using reference information
207
+ generated_schema = Schema(
208
+ context=reference.sdmx_type.lower(), # type: ignore[arg-type]
209
+ agency=reference.agency,
210
+ id=reference.id,
211
+ version=reference.version,
212
+ components=Components(sdmx_components),
213
+ )
214
+
215
+ pandas_dataset = PandasDataset(
216
+ structure=generated_schema,
217
+ data=data,
218
+ )
219
+ return pandas_dataset
220
+
221
+
222
+ def _map_vtl_dtype_to_sdmx(
223
+ vtl_dtype_value: Union[ScalarType, Type[ScalarType]],
224
+ ) -> DataType:
225
+ """Return the SDMX DataType for a given VTL scalar.
226
+
227
+ Args:
228
+ vtl_dtype_value: The VTL scalar type or instance to map.
229
+
230
+ Returns:
231
+ The corresponding SDMX DataType.
232
+
233
+ Raises:
234
+ Invalid: If the VTL DataType cannot be mapped to an SDMX DataType.
235
+ """
236
+ if isinstance(vtl_dtype_value, type):
237
+ vtl_dtype_class: type[ScalarType] = vtl_dtype_value
238
+ else:
239
+ vtl_dtype_class = type(vtl_dtype_value)
240
+
241
+ if vtl_dtype_class not in VTL_TO_SDMX_TYPE_MAP:
242
+ supported = ", ".join(str(t.__name__) for t in VTL_TO_SDMX_TYPE_MAP)
243
+ raise Invalid(
244
+ "Validation Error",
245
+ f"VTL DataType '{vtl_dtype_class.__name__}' cannot be "
246
+ f"mapped to an SDMX type. Supported types are: {supported}",
247
+ )
248
+
249
+ return VTL_TO_SDMX_TYPE_MAP[vtl_dtype_class]
250
+
251
+
252
+ def _map_vtl_role_to_sdmx(vtl_role: VTLRole) -> Role:
253
+ """Return the SDMX Role for a given VTL Role.
254
+
255
+ Args:
256
+ vtl_role: The VTLRole to map.
257
+
258
+ Returns:
259
+ The corresponding SDMX Role.
260
+
261
+ Raises:
262
+ Invalid: If the VTL Role cannot be mapped to an SDMX Role.
263
+ """
264
+ if vtl_role not in VTL_TO_SDMX_ROLE_MAP:
265
+ raise Invalid(
266
+ "Validation Error",
267
+ f"VTL Role '{vtl_role}' cannot be mapped to an SDMX Role",
268
+ )
269
+ return VTL_TO_SDMX_ROLE_MAP[vtl_role]
270
+
271
+
272
+ def _validate_vtl_dataset_against_schema(
273
+ dataset: VTLengineDataset,
274
+ schema: Schema,
275
+ ) -> None:
276
+ """Validate VTLengine Dataset against SDMX Schema.
277
+
278
+ Args:
279
+ dataset: The VTLengineDataset instance whose components, roles, and
280
+ data types will be validated.
281
+ schema: The SDMX Schema that defines the expected components,
282
+ SDMX data types, and SDMX roles for validation.
283
+
284
+ Raises:
285
+ Invalid: If component names differ, if types or roles cannot be mapped,
286
+ or if any mismatch is detected between the Dataset and Schema.
287
+ """
288
+ # Validate that schema components match VTL dataset components
289
+ vtl_component_names = set(dataset.components.keys())
290
+ schema_component_names = {comp.id for comp in schema.components}
291
+
292
+ if vtl_component_names != schema_component_names:
293
+ missing_in_schema = vtl_component_names - schema_component_names
294
+ missing_in_vtl = schema_component_names - vtl_component_names
295
+ error_parts = []
296
+ if missing_in_schema:
297
+ error_parts.append(
298
+ f"VTL components not in Schema: {missing_in_schema}"
299
+ )
300
+ if missing_in_vtl:
301
+ error_parts.append(
302
+ f"Schema components not in VTL: {missing_in_vtl}"
303
+ )
304
+ raise Invalid(
305
+ "Validation Error",
306
+ "Component mismatch between VTL Dataset and Schema. "
307
+ f"{'; '.join(error_parts)}",
308
+ )
309
+
310
+ # Validate that component types and roles match
311
+ for component in schema.components:
312
+ comp_id = str(component.id)
313
+ vtl_comp = dataset.components[comp_id]
314
+
315
+ # Validate data type using helper
316
+ expected_sdmx_dtype = _map_vtl_dtype_to_sdmx(vtl_comp.data_type)
317
+ if component.dtype != expected_sdmx_dtype:
318
+ raise Invalid(
319
+ "Validation Error",
320
+ "Component mismatch between VTL Dataset and Schema. "
321
+ f"Component '{comp_id}' has type {expected_sdmx_dtype} "
322
+ f"in VTL but {component.dtype} in Schema",
323
+ )
324
+
325
+ # Validate role using helper
326
+ expected_sdmx_role = _map_vtl_role_to_sdmx(vtl_comp.role)
327
+ if component.role != expected_sdmx_role:
328
+ raise Invalid(
329
+ "Validation Error",
330
+ "Component mismatch between VTL Dataset and Schema. "
331
+ f"Component '{comp_id}' has role {expected_sdmx_role} "
332
+ f"in VTL but {component.role} in Schema",
333
+ )
@@ -97,7 +97,7 @@ def generate_vtl_script(
97
97
 
98
98
  if prettyprint:
99
99
  __check_vtl_extra()
100
- from vtlengine import prettify # type: ignore[import-untyped]
100
+ from vtlengine import prettify
101
101
 
102
102
  return prettify(vtl_script)
103
103