pysdmx 1.5.2__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. pysdmx/__init__.py +1 -1
  2. pysdmx/api/fmr/__init__.py +8 -3
  3. pysdmx/api/fmr/maintenance.py +158 -0
  4. pysdmx/api/qb/structure.py +1 -0
  5. pysdmx/api/qb/util.py +1 -0
  6. pysdmx/io/csv/__csv_aux_reader.py +99 -0
  7. pysdmx/io/csv/__csv_aux_writer.py +118 -0
  8. pysdmx/io/csv/sdmx10/reader/__init__.py +9 -14
  9. pysdmx/io/csv/sdmx10/writer/__init__.py +28 -2
  10. pysdmx/io/csv/sdmx20/__init__.py +0 -9
  11. pysdmx/io/csv/sdmx20/reader/__init__.py +8 -61
  12. pysdmx/io/csv/sdmx20/writer/__init__.py +32 -25
  13. pysdmx/io/csv/sdmx21/__init__.py +1 -0
  14. pysdmx/io/csv/sdmx21/reader/__init__.py +86 -0
  15. pysdmx/io/csv/sdmx21/writer/__init__.py +70 -0
  16. pysdmx/io/format.py +8 -0
  17. pysdmx/io/input_processor.py +16 -2
  18. pysdmx/io/json/fusion/messages/code.py +21 -4
  19. pysdmx/io/json/fusion/messages/concept.py +16 -8
  20. pysdmx/io/json/fusion/messages/dataflow.py +8 -1
  21. pysdmx/io/json/fusion/messages/dsd.py +15 -0
  22. pysdmx/io/json/fusion/messages/schema.py +8 -1
  23. pysdmx/io/json/sdmxjson2/messages/agency.py +43 -7
  24. pysdmx/io/json/sdmxjson2/messages/category.py +92 -7
  25. pysdmx/io/json/sdmxjson2/messages/code.py +239 -18
  26. pysdmx/io/json/sdmxjson2/messages/concept.py +78 -13
  27. pysdmx/io/json/sdmxjson2/messages/constraint.py +5 -5
  28. pysdmx/io/json/sdmxjson2/messages/core.py +121 -14
  29. pysdmx/io/json/sdmxjson2/messages/dataflow.py +63 -8
  30. pysdmx/io/json/sdmxjson2/messages/dsd.py +215 -20
  31. pysdmx/io/json/sdmxjson2/messages/map.py +200 -24
  32. pysdmx/io/json/sdmxjson2/messages/pa.py +36 -5
  33. pysdmx/io/json/sdmxjson2/messages/provider.py +35 -7
  34. pysdmx/io/json/sdmxjson2/messages/report.py +85 -7
  35. pysdmx/io/json/sdmxjson2/messages/schema.py +11 -12
  36. pysdmx/io/json/sdmxjson2/messages/structure.py +150 -2
  37. pysdmx/io/json/sdmxjson2/messages/vtl.py +547 -17
  38. pysdmx/io/json/sdmxjson2/reader/metadata.py +32 -0
  39. pysdmx/io/json/sdmxjson2/reader/structure.py +32 -0
  40. pysdmx/io/json/sdmxjson2/writer/__init__.py +9 -0
  41. pysdmx/io/json/sdmxjson2/writer/metadata.py +60 -0
  42. pysdmx/io/json/sdmxjson2/writer/structure.py +61 -0
  43. pysdmx/io/reader.py +28 -9
  44. pysdmx/io/serde.py +17 -0
  45. pysdmx/io/writer.py +45 -9
  46. pysdmx/io/xml/__write_data_aux.py +1 -54
  47. pysdmx/io/xml/__write_structure_specific_aux.py +1 -1
  48. pysdmx/io/xml/sdmx21/writer/generic.py +1 -1
  49. pysdmx/model/code.py +11 -1
  50. pysdmx/model/dataflow.py +23 -0
  51. pysdmx/model/map.py +12 -4
  52. pysdmx/model/message.py +9 -1
  53. pysdmx/toolkit/pd/_data_utils.py +100 -0
  54. pysdmx/toolkit/vtl/_validations.py +2 -3
  55. {pysdmx-1.5.2.dist-info → pysdmx-1.6.0.dist-info}/METADATA +3 -2
  56. {pysdmx-1.5.2.dist-info → pysdmx-1.6.0.dist-info}/RECORD +58 -46
  57. {pysdmx-1.5.2.dist-info → pysdmx-1.6.0.dist-info}/WHEEL +1 -1
  58. {pysdmx-1.5.2.dist-info → pysdmx-1.6.0.dist-info/licenses}/LICENSE +0 -0
pysdmx/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """Your opinionated Python SDMX library."""
2
2
 
3
- __version__ = "1.5.2"
3
+ __version__ = "1.6.0"
@@ -571,13 +571,16 @@ class RegistryClient(__BaseRegistryClient):
571
571
  d = DataflowDetails(detail) if isinstance(detail, str) else detail
572
572
  sq, dr = super()._df_details(d)
573
573
  if sq:
574
- cmps = self.get_schema("dataflow", agency, id, version).components
574
+ schema = self.get_schema("dataflow", agency, id, version)
575
+ cmps = schema.components
576
+ grps = schema.groups
575
577
  else:
576
578
  cmps = None
579
+ grps = None
577
580
  query = super()._dataflow_details_q(agency, id, version, dr)
578
581
  out = self.__fetch(query)
579
582
  return super()._out(
580
- out, self.deser.dataflow_info, cmps, agency, id, version
583
+ out, self.deser.dataflow_info, cmps, grps, agency, id, version
581
584
  )
582
585
 
583
586
  def get_dataflows(
@@ -1034,12 +1037,14 @@ class AsyncRegistryClient(__BaseRegistryClient):
1034
1037
  version,
1035
1038
  )
1036
1039
  cmps = schema.components
1040
+ grps = schema.groups
1037
1041
  else:
1038
1042
  cmps = None
1043
+ grps = None
1039
1044
  query = super()._dataflow_details_q(agency, id, version, dr)
1040
1045
  out = await self.__fetch(query)
1041
1046
  return super()._out(
1042
- out, self.deser.dataflow_info, cmps, agency, id, version
1047
+ out, self.deser.dataflow_info, cmps, grps, agency, id, version
1043
1048
  )
1044
1049
 
1045
1050
  async def get_dataflows(
@@ -0,0 +1,158 @@
1
+ """Upload metadata to an FMR instance."""
2
+
3
+ from enum import Enum
4
+ from typing import Optional, Sequence, Union
5
+
6
+ import httpx
7
+ import msgspec
8
+
9
+ from pysdmx.io.json.sdmxjson2.writer import serializers
10
+ from pysdmx.model import MetadataReport
11
+ from pysdmx.model.__base import MaintainableArtefact
12
+ from pysdmx.model.message import (
13
+ Header,
14
+ MetadataMessage,
15
+ StructureMessage,
16
+ )
17
+ from pysdmx.util._net_utils import map_httpx_errors
18
+
19
+
20
+ class StructureAction(Enum):
21
+ """Enumeration that defines the action when updating metadata in the FMR.
22
+
23
+ Arguments:
24
+ Append: Metadata uploaded with action 'Append' may only add new
25
+ metadata and may not overwrite any existing metadata, i.e. any
26
+ attempt to update existing metadata will be rejected.
27
+ Merge: Metadata uploaded with action 'Merge' may add new metadata and
28
+ replace existing metadata. However, for Item Schemes (codelists,
29
+ concept schemes, etc.), the items submitted will be added to the
30
+ existing scheme. For example, if a codelist exists with codes A, B,
31
+ and C, and the same codelist is submitted with codes B and X, then
32
+ the resulting codelist will have codes A, B, C, X, i.e. code B has
33
+ been replaced while code X has been added.
34
+ Replace: Metadata uploaded with action 'Replace' may add new metadata,
35
+ and can also replace existing metadata with new ones. This is the
36
+ default.
37
+ """
38
+
39
+ Append = "Append"
40
+ Merge = "Merge"
41
+ Replace = "Replace"
42
+
43
+
44
+ class RegistryMaintenanceClient:
45
+ """EXPERIMENTAL: A client to update metadata in the FMR."""
46
+
47
+ def __init__(
48
+ self,
49
+ api_endpoint: str,
50
+ user: str,
51
+ password: str,
52
+ pem: Optional[str] = None,
53
+ timeout: float = 60.0,
54
+ ):
55
+ """Instantiate a new client to update metadata in the target endpoint.
56
+
57
+ Args:
58
+ api_endpoint: The endpoint of the targeted service.
59
+ user: Username for authentication.
60
+ password: Password for authentication.
61
+ pem: In case the service exposed a certificate created by an
62
+ unknown certificate authority, you can pass a pem file for
63
+ this authority using this parameter.
64
+ timeout: The maximum number of seconds to wait before considering
65
+ that a request timed out. Defaults to 10 seconds.
66
+ """
67
+ if api_endpoint.endswith("/"):
68
+ api_endpoint = api_endpoint[0:-1]
69
+ self._api_endpoint = f"{api_endpoint}"
70
+ self._user = user
71
+ self._password = password
72
+ self._timeout = timeout
73
+ self._ssl_context = (
74
+ httpx.create_ssl_context(
75
+ verify=pem,
76
+ )
77
+ if pem
78
+ else httpx.create_ssl_context()
79
+ )
80
+ self._encoder = msgspec.json.Encoder()
81
+
82
+ def __post(
83
+ self,
84
+ message: Union[MetadataMessage, StructureMessage],
85
+ action: StructureAction,
86
+ endpoint: str,
87
+ ) -> None:
88
+ with httpx.Client(verify=self._ssl_context) as client:
89
+ try:
90
+ url = f"{endpoint}"
91
+ auth = httpx.BasicAuth(self._user, self._password)
92
+ headers = {
93
+ "Content-Type": "application/text",
94
+ "Action": action.value,
95
+ }
96
+ if isinstance(message, MetadataMessage):
97
+ serializer = serializers.metadata_message
98
+ else:
99
+ serializer = serializers.structure_message
100
+ bodyjs = self._encoder.encode(serializer.from_model(message))
101
+ r = client.post(
102
+ url,
103
+ headers=headers,
104
+ content=bodyjs,
105
+ timeout=self._timeout,
106
+ auth=auth,
107
+ )
108
+ r.raise_for_status()
109
+ except (httpx.RequestError, httpx.HTTPStatusError) as e:
110
+ map_httpx_errors(e)
111
+
112
+ def put_structures(
113
+ self,
114
+ artefacts: Sequence[MaintainableArtefact],
115
+ header: Optional[Header] = None,
116
+ action: StructureAction = StructureAction.Replace,
117
+ ) -> None:
118
+ """EXPERIMENTAL: Upload SDMX structures to the FMR.
119
+
120
+ This method is experimental and its interface or behavior may change
121
+ without notice.
122
+
123
+ Args:
124
+ artefacts: The sequence of SDMX maintainable artefacts to upload.
125
+ header: Optional SDMX Header to include in the message. If not
126
+ supplied, pysdmx will generate one for you.
127
+ action: How to apply the changes in case of already existing
128
+ structures.
129
+ """
130
+ if not header:
131
+ header = Header()
132
+ message = StructureMessage(header=header, structures=artefacts)
133
+ endpoint = f"{self._api_endpoint}/ws/secure/sdmxapi/rest"
134
+ return self.__post(message, action, endpoint)
135
+
136
+ def put_metadata_reports(
137
+ self,
138
+ reports: Sequence[MetadataReport],
139
+ header: Optional[Header] = None,
140
+ action: StructureAction = StructureAction.Replace,
141
+ ) -> None:
142
+ """EXPERIMENTAL: Upload SDMX metadata reports to the FMR.
143
+
144
+ This method is experimental and its interface or behavior may change
145
+ without notice.
146
+
147
+ Args:
148
+ reports: A sequence of metadata reports to upload.
149
+ header: Optional SDMX Header to include in the message. If not
150
+ supplied, pysdmx will generate one for you.
151
+ action: How to apply the changes in case of already existing
152
+ structures.
153
+ """
154
+ if not header:
155
+ header = Header()
156
+ message = MetadataMessage(header=header, reports=reports)
157
+ endpoint = f"{self._api_endpoint}/ws/secure/sdmx/v2/metadata"
158
+ return self.__post(message, action, endpoint)
@@ -273,6 +273,7 @@ _API_RESOURCES = {
273
273
  "V2.1.0": _V2_0_RESOURCES,
274
274
  "V2.2.0": _V2_0_RESOURCES,
275
275
  "V2.2.1": _V2_0_RESOURCES,
276
+ "V2.2.2": _V2_0_RESOURCES,
276
277
  "LATEST": _V2_0_RESOURCES,
277
278
  }
278
279
 
pysdmx/api/qb/util.py CHANGED
@@ -25,6 +25,7 @@ class ApiVersion(IntEnum):
25
25
  V2_1_0 = 9
26
26
  V2_2_0 = 10
27
27
  V2_2_1 = 11
28
+ V2_2_2 = 12
28
29
 
29
30
 
30
31
  MULT_SEP = re.compile(r"\+")
@@ -0,0 +1,99 @@
1
+ import pandas as pd
2
+
3
+ from pysdmx.errors import Invalid
4
+ from pysdmx.io.pd import PandasDataset
5
+ from pysdmx.model.dataset import ActionType
6
+
7
+ ACTION_SDMX_CSV_MAPPER_READING = {
8
+ "A": ActionType.Append,
9
+ "D": ActionType.Delete,
10
+ "R": ActionType.Replace,
11
+ "I": ActionType.Information,
12
+ }
13
+
14
+
15
+ def __generate_dataset_from_sdmx_csv( # noqa: C901
16
+ data: pd.DataFrame,
17
+ references_21: bool = False,
18
+ ) -> PandasDataset:
19
+ urn = ""
20
+ df_csv = pd.DataFrame()
21
+ action = None
22
+
23
+ if {"STRUCTURE", "STRUCTURE_ID"}.issubset(data.columns):
24
+ action = ActionType.Information
25
+ if "ACTION" in data.columns:
26
+ unique_values = list(data["ACTION"].unique())
27
+ if len(unique_values) > 1 and "D" in unique_values:
28
+ unique_values.remove("D")
29
+ data = data[data["ACTION"] != "D"]
30
+ if len(unique_values) == 1: # If there is only one value, use it
31
+ action_value = unique_values[0]
32
+ if action_value == "M":
33
+ if not references_21:
34
+ raise Invalid(
35
+ "Invalid value on ACTION column",
36
+ "Value 'M' is only allowed for"
37
+ " SDMX-CSV 2.1 files.",
38
+ )
39
+ action = ActionType.Append
40
+ elif action_value in ACTION_SDMX_CSV_MAPPER_READING:
41
+ action = ACTION_SDMX_CSV_MAPPER_READING[action_value]
42
+ else:
43
+ raise Invalid(
44
+ "Invalid value on ACTION column",
45
+ "Invalid SDMX-CSV file. "
46
+ "Check the docs for the proper values "
47
+ "on ACTION column.",
48
+ )
49
+
50
+ del data["ACTION"] # Remove ACTION column from DataFrame
51
+ else:
52
+ raise Invalid(
53
+ "Invalid value on ACTION column",
54
+ "Invalid SDMX-CSV file. "
55
+ "Cannot have more than one value on ACTION column, "
56
+ "or 2 if D is present",
57
+ )
58
+ # Remove columns that are not needed
59
+ if "STRUCTURE_NAME" in data.columns:
60
+ data = data.drop(columns=["STRUCTURE_NAME"])
61
+ if "SERIES_KEYS" in data.columns:
62
+ data = data.drop(columns=["SERIES_KEYS"])
63
+ if "OBS_KEYS" in data.columns:
64
+ data = data.drop(columns=["OBS_KEYS"])
65
+
66
+ # For SDMX-CSV version 2, use 'STRUCTURE_ID'
67
+ # column as the structure id and 'STRUCTURE' as the structure type
68
+ structure_id = data["STRUCTURE_ID"].iloc[0]
69
+ structure_type = data["STRUCTURE"].iloc[0]
70
+ # Drop 'STRUCTURE' and 'STRUCTURE_ID' columns from DataFrame
71
+ df_csv = data.drop(["STRUCTURE", "STRUCTURE_ID"], axis=1)
72
+ if structure_id.count(":") == 2:
73
+ structure_id = ":".join(structure_id.split(":")[:2])
74
+ if structure_type == "DataStructure".lower():
75
+ urn = f"DataStructure={structure_id}"
76
+ elif structure_type == "Dataflow".lower():
77
+ urn = f"Dataflow={structure_id}"
78
+ elif structure_type == "dataprovision":
79
+ urn = f"ProvisionAgreement={structure_id}"
80
+ else:
81
+ raise Invalid(
82
+ "Invalid value on STRUCTURE column",
83
+ "Invalid SDMX-CSV file. "
84
+ "Check the docs for the proper values on STRUCTURE column.",
85
+ )
86
+ else:
87
+ # For SDMX-CSV version 1, use 'DATAFLOW' column as the structure id
88
+ structure_id = data["DATAFLOW"].iloc[0]
89
+ if structure_id.count(":") == 2:
90
+ structure_id = ":".join(structure_id.split(":")[:2])
91
+ # Drop 'DATAFLOW' column from DataFrame
92
+ df_csv = data.drop(["DATAFLOW"], axis=1)
93
+
94
+ urn = f"Dataflow={structure_id}"
95
+ return PandasDataset(
96
+ structure=urn,
97
+ data=df_csv,
98
+ action=action if action is not None else ActionType.Information,
99
+ )
@@ -0,0 +1,118 @@
1
+ from copy import copy
2
+ from typing import List, Literal, Optional, Sequence
3
+
4
+ import pandas as pd
5
+
6
+ from pysdmx.io.pd import PandasDataset
7
+ from pysdmx.model import Schema
8
+ from pysdmx.model.dataset import ActionType
9
+ from pysdmx.toolkit.pd._data_utils import format_labels, get_codes
10
+
11
+ SDMX_CSV_ACTION_MAPPER = {
12
+ ActionType.Append: "A",
13
+ ActionType.Replace: "R",
14
+ ActionType.Information: "I",
15
+ ActionType.Delete: "D",
16
+ }
17
+
18
+
19
+ def __write_time_period(df: pd.DataFrame, time_format: str) -> None:
20
+ # TODO: Correct handle of normalized time format
21
+ raise NotImplementedError("Normalized time format is not implemented yet.")
22
+
23
+
24
+ def __write_keys(
25
+ df: pd.DataFrame, keys: Literal["obs", "series", "both"], schema: Schema
26
+ ) -> None:
27
+ """Writes the keys to the DataFrame.
28
+
29
+ Args:
30
+ df: The DataFrame to write the keys to.
31
+ keys: to write or not the keys columns
32
+ If None, no keys are written.
33
+ If "obs", the keys are write as a single
34
+ column called "OBS_KEY".
35
+ If "series", the keys are write as a single
36
+ column called "SERIES_KEY".
37
+ If "both", the keys are write as two columns:
38
+ "OBS_KEY" and "SERIES_KEY".
39
+ schema: The schema to get the keys
40
+ """
41
+ series_codes, obs_codes, group_codes = get_codes(
42
+ dimension_code="", structure=schema, data=df
43
+ )
44
+ del obs_codes[0]
45
+ obs_parts = []
46
+ series_parts = []
47
+ for k, v in df.items():
48
+ value = v.iloc[0]
49
+ if k in obs_codes:
50
+ obs_parts.append(str(value))
51
+ if k in series_codes:
52
+ series_parts.append(str(value))
53
+ obs_values = ".".join(obs_parts)
54
+ series_values = ".".join(series_parts)
55
+ if keys == "obs":
56
+ df.insert(0, "OBS_KEYS", obs_values)
57
+ elif keys == "series":
58
+ df.insert(0, "SERIES_KEYS", series_values)
59
+ else:
60
+ df.insert(0, "OBS_KEYS", obs_values)
61
+ df.insert(0, "SERIES_KEYS", series_values)
62
+
63
+
64
+ def _write_csv_2_aux(
65
+ datasets: Sequence[PandasDataset],
66
+ labels: Optional[Literal["name", "id", "both"]] = None,
67
+ time_format: Optional[Literal["original", "normalized"]] = None,
68
+ keys: Optional[Literal["obs", "series", "both"]] = None,
69
+ references_21: bool = False,
70
+ ) -> List[pd.DataFrame]:
71
+ dataframes = []
72
+ for dataset in datasets:
73
+ # Create a copy of the dataset
74
+ df: pd.DataFrame = copy(dataset.data)
75
+ structure_ref, unique_id = dataset.short_urn.split("=", maxsplit=1)
76
+
77
+ # Add additional attributes to the dataset
78
+ for k, v in dataset.attributes.items():
79
+ df[k] = v
80
+
81
+ if structure_ref in ["DataStructure", "Dataflow"]:
82
+ structure_ref = structure_ref.lower()
83
+ else:
84
+ structure_ref = "dataprovision"
85
+
86
+ if references_21 and dataset.action in [
87
+ ActionType.Information,
88
+ ActionType.Append,
89
+ ]:
90
+ action_value = "M"
91
+ else:
92
+ action_value = SDMX_CSV_ACTION_MAPPER[dataset.action]
93
+
94
+ if time_format is not None and time_format != "original":
95
+ __write_time_period(df, time_format)
96
+ if keys is not None and isinstance(dataset.structure, Schema):
97
+ __write_keys(df, keys, dataset.structure)
98
+ if labels is not None and isinstance(dataset.structure, Schema):
99
+ format_labels(df, labels, dataset.structure.components)
100
+ df.insert(0, "STRUCTURE", structure_ref)
101
+ df.insert(
102
+ 1,
103
+ "STRUCTURE_ID",
104
+ f"{unique_id}:{dataset.structure.name}"
105
+ if labels == "both"
106
+ else unique_id,
107
+ )
108
+ action_position = 2
109
+ if labels == "name":
110
+ action_position += 1
111
+ df.insert(2, "STRUCTURE_NAME", dataset.structure.name)
112
+ df.insert(action_position, "ACTION", action_value)
113
+ else:
114
+ df.insert(0, "STRUCTURE", structure_ref)
115
+ df.insert(1, "STRUCTURE_ID", unique_id)
116
+ df.insert(2, "ACTION", action_value)
117
+ dataframes.append(df)
118
+ return dataframes
@@ -6,23 +6,10 @@ from typing import Sequence
6
6
  import pandas as pd
7
7
 
8
8
  from pysdmx.errors import Invalid
9
+ from pysdmx.io.csv.__csv_aux_reader import __generate_dataset_from_sdmx_csv
9
10
  from pysdmx.io.pd import PandasDataset
10
11
 
11
12
 
12
- def __generate_dataset_from_sdmx_csv(data: pd.DataFrame) -> PandasDataset:
13
- # For SDMX-CSV version 1, use 'DATAFLOW' column as the structure id
14
- structure_id = data["DATAFLOW"].iloc[0]
15
- # Drop 'DATAFLOW' column from DataFrame
16
- df_csv = data.drop(["DATAFLOW"], axis=1)
17
- urn = f"Dataflow={structure_id}"
18
-
19
- # Return a Dataset object with the extracted information
20
- return PandasDataset(
21
- structure=urn,
22
- data=df_csv,
23
- )
24
-
25
-
26
13
  def read(input_str: str) -> Sequence[PandasDataset]:
27
14
  """Reads csv data and returns a sequence of Datasets.
28
15
 
@@ -56,6 +43,14 @@ def read(input_str: str) -> Sequence[PandasDataset]:
56
43
  # Check if any column headers contain ':', indicating mode, label or text
57
44
  mode_label_text = any(":" in x for x in df_csv.columns)
58
45
 
46
+ # if values in the columns contain ':', split them
47
+ for col in df_csv.columns[1:]:
48
+ df_csv[col] = (
49
+ df_csv[col]
50
+ .astype(str)
51
+ .apply(lambda x: x.split(":")[0] if ":" in x else x)
52
+ )
53
+
59
54
  # Determine the id column based on the SDMX-CSV version
60
55
  id_column = "DATAFLOW"
61
56
 
@@ -2,15 +2,20 @@
2
2
 
3
3
  from copy import copy
4
4
  from pathlib import Path
5
- from typing import Optional, Sequence, Union
5
+ from typing import Literal, Optional, Sequence, Union
6
6
 
7
7
  import pandas as pd
8
8
 
9
+ from pysdmx.io.csv.__csv_aux_writer import __write_time_period
9
10
  from pysdmx.io.pd import PandasDataset
11
+ from pysdmx.model import Schema
12
+ from pysdmx.toolkit.pd._data_utils import format_labels
10
13
 
11
14
 
12
15
  def write(
13
16
  datasets: Sequence[PandasDataset],
17
+ labels: Optional[Literal["id", "both"]] = None,
18
+ time_format: Optional[Literal["original", "normalized"]] = None,
14
19
  output_path: Optional[Union[str, Path]] = None,
15
20
  ) -> Optional[str]:
16
21
  """Write data to SDMX-CSV 1.0 format.
@@ -20,6 +25,15 @@ def write(
20
25
  Must have the same components.
21
26
  output_path: Path to write the data to.
22
27
  If None, the data is returned as a string.
28
+ labels: How to write the name of the columns.
29
+ If None, only the IDs are written.
30
+ if "id", the names are written as ID only.
31
+ If "both", the names are witten as id:Name.
32
+ time_format: How to write the time period.
33
+ If None, the time period is not modified.
34
+ If "original", the time period is written as it
35
+ is in the dataset.
36
+ "Normalized" is not implemented yet.
23
37
 
24
38
  Returns:
25
39
  SDMX CSV data as a string, if output_path is None.
@@ -31,11 +45,23 @@ def write(
31
45
  dataframes = []
32
46
  for dataset in datasets:
33
47
  df: pd.DataFrame = copy(dataset.data)
34
- df.insert(0, "DATAFLOW", dataset.short_urn.split("=")[1])
35
48
 
36
49
  # Add additional attributes to the dataset
37
50
  for k, v in dataset.attributes.items():
38
51
  df[k] = v
52
+ structure_id = dataset.short_urn.split("=")[1]
53
+ if time_format is not None and time_format != "original":
54
+ __write_time_period(df, time_format)
55
+ if labels is not None and isinstance(dataset.structure, Schema):
56
+ format_labels(df, labels, dataset.structure.components)
57
+ if labels == "id":
58
+ df.insert(0, "DATAFLOW", structure_id)
59
+ else:
60
+ df.insert(
61
+ 0, "DATAFLOW", f"{structure_id}:{dataset.structure.name}"
62
+ )
63
+ else:
64
+ df.insert(0, "DATAFLOW", structure_id)
39
65
 
40
66
  dataframes.append(df)
41
67
 
@@ -1,10 +1 @@
1
1
  """SDMX 2.0 CSV reader and writer."""
2
-
3
- from pysdmx.model.dataset import ActionType
4
-
5
- SDMX_CSV_ACTION_MAPPER = {
6
- ActionType.Append: "A",
7
- ActionType.Replace: "R",
8
- ActionType.Information: "I",
9
- ActionType.Delete: "D",
10
- }
@@ -6,68 +6,8 @@ from typing import Sequence
6
6
  import pandas as pd
7
7
 
8
8
  from pysdmx.errors import Invalid
9
+ from pysdmx.io.csv.__csv_aux_reader import __generate_dataset_from_sdmx_csv
9
10
  from pysdmx.io.pd import PandasDataset
10
- from pysdmx.model.dataset import ActionType
11
-
12
- ACTION_SDMX_CSV_MAPPER_READING = {
13
- "A": ActionType.Append,
14
- "D": ActionType.Delete,
15
- "R": ActionType.Replace,
16
- "I": ActionType.Information,
17
- }
18
-
19
-
20
- def __generate_dataset_from_sdmx_csv(data: pd.DataFrame) -> PandasDataset:
21
- # Extract Structure type and structure id
22
- action = ActionType.Information
23
- if "ACTION" in data.columns:
24
- unique_values = list(data["ACTION"].unique())
25
- if len(unique_values) > 1 and "D" in unique_values:
26
- unique_values.remove("D")
27
- data = data[data["ACTION"] != "D"]
28
- if len(unique_values) == 1: # If there is only one value, use it
29
- action_value = unique_values[0]
30
- if action_value not in ACTION_SDMX_CSV_MAPPER_READING:
31
- raise Invalid(
32
- "Invalid value on ACTION column",
33
- "Invalid SDMX-CSV 2.0 file. "
34
- "Check the docs for the proper values on ACTION column.",
35
- )
36
- action = ACTION_SDMX_CSV_MAPPER_READING[action_value]
37
- del data["ACTION"] # Remove ACTION column from DataFrame
38
- else:
39
- raise Invalid(
40
- "Invalid value on ACTION column",
41
- "Invalid SDMX-CSV 2.0 file. "
42
- "Cannot have more than one value on ACTION column, "
43
- "or 2 if D is present",
44
- )
45
- # For SDMX-CSV version 2, use 'STRUCTURE_ID'
46
- # column as the structure id and 'STRUCTURE' as the structure type
47
- structure_id = data["STRUCTURE_ID"].iloc[0]
48
- structure_type = data["STRUCTURE"].iloc[0]
49
- # Drop 'STRUCTURE' and 'STRUCTURE_ID' columns from DataFrame
50
- df_csv = data.drop(["STRUCTURE", "STRUCTURE_ID"], axis=1)
51
-
52
- if structure_type == "DataStructure".lower():
53
- urn = f"DataStructure={structure_id}"
54
- elif structure_type == "Dataflow".lower():
55
- urn = f"Dataflow={structure_id}"
56
- elif structure_type == "dataprovision":
57
- urn = f"ProvisionAgreement={structure_id}"
58
- else:
59
- raise Invalid(
60
- "Invalid value on STRUCTURE column",
61
- "Invalid SDMX-CSV 2.0 file. "
62
- "Check the docs for the proper values on STRUCTURE column.",
63
- )
64
-
65
- # Return a Dataset object with the extracted information
66
- return PandasDataset(
67
- structure=urn,
68
- data=df_csv,
69
- action=action,
70
- )
71
11
 
72
12
 
73
13
  def read(input_str: str) -> Sequence[PandasDataset]:
@@ -105,6 +45,13 @@ def read(input_str: str) -> Sequence[PandasDataset]:
105
45
  df_csv = df_csv.astype(str).replace({"nan": "", "<NA>": ""})
106
46
  # Check if any column headers contain ':', indicating mode, label or text
107
47
  mode_label_text = any(":" in x for x in df_csv.columns)
48
+ # if values in the columns contain ':', split them
49
+ for col in df_csv.columns[2:]:
50
+ df_csv[col] = (
51
+ df_csv[col]
52
+ .astype(str)
53
+ .apply(lambda x: x.split(":")[0] if ":" in x else x)
54
+ )
108
55
 
109
56
  id_column = "STRUCTURE_ID"
110
57
  # If mode, label or text is present, modify the DataFrame