pysdmx 1.5.1__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pysdmx/__init__.py +1 -1
- pysdmx/api/fmr/__init__.py +8 -3
- pysdmx/api/fmr/maintenance.py +158 -0
- pysdmx/api/qb/structure.py +1 -0
- pysdmx/api/qb/util.py +1 -0
- pysdmx/io/csv/__csv_aux_reader.py +99 -0
- pysdmx/io/csv/__csv_aux_writer.py +118 -0
- pysdmx/io/csv/sdmx10/reader/__init__.py +9 -14
- pysdmx/io/csv/sdmx10/writer/__init__.py +28 -2
- pysdmx/io/csv/sdmx20/__init__.py +0 -9
- pysdmx/io/csv/sdmx20/reader/__init__.py +8 -61
- pysdmx/io/csv/sdmx20/writer/__init__.py +32 -25
- pysdmx/io/csv/sdmx21/__init__.py +1 -0
- pysdmx/io/csv/sdmx21/reader/__init__.py +86 -0
- pysdmx/io/csv/sdmx21/writer/__init__.py +70 -0
- pysdmx/io/format.py +8 -0
- pysdmx/io/input_processor.py +16 -2
- pysdmx/io/json/fusion/messages/code.py +21 -4
- pysdmx/io/json/fusion/messages/concept.py +16 -8
- pysdmx/io/json/fusion/messages/dataflow.py +8 -1
- pysdmx/io/json/fusion/messages/dsd.py +15 -0
- pysdmx/io/json/fusion/messages/schema.py +8 -1
- pysdmx/io/json/sdmxjson2/messages/agency.py +43 -7
- pysdmx/io/json/sdmxjson2/messages/category.py +92 -7
- pysdmx/io/json/sdmxjson2/messages/code.py +239 -18
- pysdmx/io/json/sdmxjson2/messages/concept.py +78 -13
- pysdmx/io/json/sdmxjson2/messages/constraint.py +5 -5
- pysdmx/io/json/sdmxjson2/messages/core.py +121 -14
- pysdmx/io/json/sdmxjson2/messages/dataflow.py +63 -8
- pysdmx/io/json/sdmxjson2/messages/dsd.py +215 -20
- pysdmx/io/json/sdmxjson2/messages/map.py +200 -24
- pysdmx/io/json/sdmxjson2/messages/pa.py +36 -5
- pysdmx/io/json/sdmxjson2/messages/provider.py +35 -7
- pysdmx/io/json/sdmxjson2/messages/report.py +85 -7
- pysdmx/io/json/sdmxjson2/messages/schema.py +11 -12
- pysdmx/io/json/sdmxjson2/messages/structure.py +150 -2
- pysdmx/io/json/sdmxjson2/messages/vtl.py +547 -17
- pysdmx/io/json/sdmxjson2/reader/metadata.py +32 -0
- pysdmx/io/json/sdmxjson2/reader/structure.py +32 -0
- pysdmx/io/json/sdmxjson2/writer/__init__.py +9 -0
- pysdmx/io/json/sdmxjson2/writer/metadata.py +60 -0
- pysdmx/io/json/sdmxjson2/writer/structure.py +61 -0
- pysdmx/io/reader.py +28 -9
- pysdmx/io/serde.py +17 -0
- pysdmx/io/writer.py +45 -9
- pysdmx/io/xml/__structure_aux_reader.py +2 -2
- pysdmx/io/xml/__structure_aux_writer.py +5 -5
- pysdmx/io/xml/__write_data_aux.py +1 -54
- pysdmx/io/xml/__write_structure_specific_aux.py +1 -1
- pysdmx/io/xml/sdmx21/writer/generic.py +1 -1
- pysdmx/model/code.py +11 -1
- pysdmx/model/dataflow.py +26 -3
- pysdmx/model/map.py +12 -4
- pysdmx/model/message.py +9 -1
- pysdmx/toolkit/pd/_data_utils.py +100 -0
- pysdmx/toolkit/vtl/_validations.py +2 -3
- {pysdmx-1.5.1.dist-info → pysdmx-1.6.0.dist-info}/METADATA +3 -2
- {pysdmx-1.5.1.dist-info → pysdmx-1.6.0.dist-info}/RECORD +60 -48
- {pysdmx-1.5.1.dist-info → pysdmx-1.6.0.dist-info}/WHEEL +1 -1
- {pysdmx-1.5.1.dist-info → pysdmx-1.6.0.dist-info/licenses}/LICENSE +0 -0
pysdmx/__init__.py
CHANGED
pysdmx/api/fmr/__init__.py
CHANGED
|
@@ -571,13 +571,16 @@ class RegistryClient(__BaseRegistryClient):
|
|
|
571
571
|
d = DataflowDetails(detail) if isinstance(detail, str) else detail
|
|
572
572
|
sq, dr = super()._df_details(d)
|
|
573
573
|
if sq:
|
|
574
|
-
|
|
574
|
+
schema = self.get_schema("dataflow", agency, id, version)
|
|
575
|
+
cmps = schema.components
|
|
576
|
+
grps = schema.groups
|
|
575
577
|
else:
|
|
576
578
|
cmps = None
|
|
579
|
+
grps = None
|
|
577
580
|
query = super()._dataflow_details_q(agency, id, version, dr)
|
|
578
581
|
out = self.__fetch(query)
|
|
579
582
|
return super()._out(
|
|
580
|
-
out, self.deser.dataflow_info, cmps, agency, id, version
|
|
583
|
+
out, self.deser.dataflow_info, cmps, grps, agency, id, version
|
|
581
584
|
)
|
|
582
585
|
|
|
583
586
|
def get_dataflows(
|
|
@@ -1034,12 +1037,14 @@ class AsyncRegistryClient(__BaseRegistryClient):
|
|
|
1034
1037
|
version,
|
|
1035
1038
|
)
|
|
1036
1039
|
cmps = schema.components
|
|
1040
|
+
grps = schema.groups
|
|
1037
1041
|
else:
|
|
1038
1042
|
cmps = None
|
|
1043
|
+
grps = None
|
|
1039
1044
|
query = super()._dataflow_details_q(agency, id, version, dr)
|
|
1040
1045
|
out = await self.__fetch(query)
|
|
1041
1046
|
return super()._out(
|
|
1042
|
-
out, self.deser.dataflow_info, cmps, agency, id, version
|
|
1047
|
+
out, self.deser.dataflow_info, cmps, grps, agency, id, version
|
|
1043
1048
|
)
|
|
1044
1049
|
|
|
1045
1050
|
async def get_dataflows(
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
"""Upload metadata to an FMR instance."""
|
|
2
|
+
|
|
3
|
+
from enum import Enum
|
|
4
|
+
from typing import Optional, Sequence, Union
|
|
5
|
+
|
|
6
|
+
import httpx
|
|
7
|
+
import msgspec
|
|
8
|
+
|
|
9
|
+
from pysdmx.io.json.sdmxjson2.writer import serializers
|
|
10
|
+
from pysdmx.model import MetadataReport
|
|
11
|
+
from pysdmx.model.__base import MaintainableArtefact
|
|
12
|
+
from pysdmx.model.message import (
|
|
13
|
+
Header,
|
|
14
|
+
MetadataMessage,
|
|
15
|
+
StructureMessage,
|
|
16
|
+
)
|
|
17
|
+
from pysdmx.util._net_utils import map_httpx_errors
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class StructureAction(Enum):
|
|
21
|
+
"""Enumeration that defines the action when updating metadata in the FMR.
|
|
22
|
+
|
|
23
|
+
Arguments:
|
|
24
|
+
Append: Metadata uploaded with action 'Append' may only add new
|
|
25
|
+
metadata and may not overwrite any existing metadata, i.e. any
|
|
26
|
+
attempt to update existing metadata will be rejected.
|
|
27
|
+
Merge: Metadata uploaded with action 'Merge' may add new metadata and
|
|
28
|
+
replace existing metadata. However, for Item Schemes (codelists,
|
|
29
|
+
concept schemes, etc.), the items submitted will be added to the
|
|
30
|
+
existing scheme. For example, if a codelist exists with codes A, B,
|
|
31
|
+
and C, and the same codelist is submitted with codes B and X, then
|
|
32
|
+
the resulting codelist will have codes A, B, C, X, i.e. code B has
|
|
33
|
+
been replaced while code X has been added.
|
|
34
|
+
Replace: Metadata uploaded with action 'Replace' may add new metadata,
|
|
35
|
+
and can also replace existing metadata with new ones. This is the
|
|
36
|
+
default.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
Append = "Append"
|
|
40
|
+
Merge = "Merge"
|
|
41
|
+
Replace = "Replace"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class RegistryMaintenanceClient:
|
|
45
|
+
"""EXPERIMENTAL: A client to update metadata in the FMR."""
|
|
46
|
+
|
|
47
|
+
def __init__(
|
|
48
|
+
self,
|
|
49
|
+
api_endpoint: str,
|
|
50
|
+
user: str,
|
|
51
|
+
password: str,
|
|
52
|
+
pem: Optional[str] = None,
|
|
53
|
+
timeout: float = 60.0,
|
|
54
|
+
):
|
|
55
|
+
"""Instantiate a new client to update metadata in the target endpoint.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
api_endpoint: The endpoint of the targeted service.
|
|
59
|
+
user: Username for authentication.
|
|
60
|
+
password: Password for authentication.
|
|
61
|
+
pem: In case the service exposed a certificate created by an
|
|
62
|
+
unknown certificate authority, you can pass a pem file for
|
|
63
|
+
this authority using this parameter.
|
|
64
|
+
timeout: The maximum number of seconds to wait before considering
|
|
65
|
+
that a request timed out. Defaults to 10 seconds.
|
|
66
|
+
"""
|
|
67
|
+
if api_endpoint.endswith("/"):
|
|
68
|
+
api_endpoint = api_endpoint[0:-1]
|
|
69
|
+
self._api_endpoint = f"{api_endpoint}"
|
|
70
|
+
self._user = user
|
|
71
|
+
self._password = password
|
|
72
|
+
self._timeout = timeout
|
|
73
|
+
self._ssl_context = (
|
|
74
|
+
httpx.create_ssl_context(
|
|
75
|
+
verify=pem,
|
|
76
|
+
)
|
|
77
|
+
if pem
|
|
78
|
+
else httpx.create_ssl_context()
|
|
79
|
+
)
|
|
80
|
+
self._encoder = msgspec.json.Encoder()
|
|
81
|
+
|
|
82
|
+
def __post(
|
|
83
|
+
self,
|
|
84
|
+
message: Union[MetadataMessage, StructureMessage],
|
|
85
|
+
action: StructureAction,
|
|
86
|
+
endpoint: str,
|
|
87
|
+
) -> None:
|
|
88
|
+
with httpx.Client(verify=self._ssl_context) as client:
|
|
89
|
+
try:
|
|
90
|
+
url = f"{endpoint}"
|
|
91
|
+
auth = httpx.BasicAuth(self._user, self._password)
|
|
92
|
+
headers = {
|
|
93
|
+
"Content-Type": "application/text",
|
|
94
|
+
"Action": action.value,
|
|
95
|
+
}
|
|
96
|
+
if isinstance(message, MetadataMessage):
|
|
97
|
+
serializer = serializers.metadata_message
|
|
98
|
+
else:
|
|
99
|
+
serializer = serializers.structure_message
|
|
100
|
+
bodyjs = self._encoder.encode(serializer.from_model(message))
|
|
101
|
+
r = client.post(
|
|
102
|
+
url,
|
|
103
|
+
headers=headers,
|
|
104
|
+
content=bodyjs,
|
|
105
|
+
timeout=self._timeout,
|
|
106
|
+
auth=auth,
|
|
107
|
+
)
|
|
108
|
+
r.raise_for_status()
|
|
109
|
+
except (httpx.RequestError, httpx.HTTPStatusError) as e:
|
|
110
|
+
map_httpx_errors(e)
|
|
111
|
+
|
|
112
|
+
def put_structures(
|
|
113
|
+
self,
|
|
114
|
+
artefacts: Sequence[MaintainableArtefact],
|
|
115
|
+
header: Optional[Header] = None,
|
|
116
|
+
action: StructureAction = StructureAction.Replace,
|
|
117
|
+
) -> None:
|
|
118
|
+
"""EXPERIMENTAL: Upload SDMX structures to the FMR.
|
|
119
|
+
|
|
120
|
+
This method is experimental and its interface or behavior may change
|
|
121
|
+
without notice.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
artefacts: The sequence of SDMX maintainable artefacts to upload.
|
|
125
|
+
header: Optional SDMX Header to include in the message. If not
|
|
126
|
+
supplied, pysdmx will generate one for you.
|
|
127
|
+
action: How to apply the changes in case of already existing
|
|
128
|
+
structures.
|
|
129
|
+
"""
|
|
130
|
+
if not header:
|
|
131
|
+
header = Header()
|
|
132
|
+
message = StructureMessage(header=header, structures=artefacts)
|
|
133
|
+
endpoint = f"{self._api_endpoint}/ws/secure/sdmxapi/rest"
|
|
134
|
+
return self.__post(message, action, endpoint)
|
|
135
|
+
|
|
136
|
+
def put_metadata_reports(
|
|
137
|
+
self,
|
|
138
|
+
reports: Sequence[MetadataReport],
|
|
139
|
+
header: Optional[Header] = None,
|
|
140
|
+
action: StructureAction = StructureAction.Replace,
|
|
141
|
+
) -> None:
|
|
142
|
+
"""EXPERIMENTAL: Upload SDMX metadata reports to the FMR.
|
|
143
|
+
|
|
144
|
+
This method is experimental and its interface or behavior may change
|
|
145
|
+
without notice.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
reports: A sequence of metadata reports to upload.
|
|
149
|
+
header: Optional SDMX Header to include in the message. If not
|
|
150
|
+
supplied, pysdmx will generate one for you.
|
|
151
|
+
action: How to apply the changes in case of already existing
|
|
152
|
+
structures.
|
|
153
|
+
"""
|
|
154
|
+
if not header:
|
|
155
|
+
header = Header()
|
|
156
|
+
message = MetadataMessage(header=header, reports=reports)
|
|
157
|
+
endpoint = f"{self._api_endpoint}/ws/secure/sdmx/v2/metadata"
|
|
158
|
+
return self.__post(message, action, endpoint)
|
pysdmx/api/qb/structure.py
CHANGED
pysdmx/api/qb/util.py
CHANGED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
|
|
3
|
+
from pysdmx.errors import Invalid
|
|
4
|
+
from pysdmx.io.pd import PandasDataset
|
|
5
|
+
from pysdmx.model.dataset import ActionType
|
|
6
|
+
|
|
7
|
+
ACTION_SDMX_CSV_MAPPER_READING = {
|
|
8
|
+
"A": ActionType.Append,
|
|
9
|
+
"D": ActionType.Delete,
|
|
10
|
+
"R": ActionType.Replace,
|
|
11
|
+
"I": ActionType.Information,
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def __generate_dataset_from_sdmx_csv( # noqa: C901
|
|
16
|
+
data: pd.DataFrame,
|
|
17
|
+
references_21: bool = False,
|
|
18
|
+
) -> PandasDataset:
|
|
19
|
+
urn = ""
|
|
20
|
+
df_csv = pd.DataFrame()
|
|
21
|
+
action = None
|
|
22
|
+
|
|
23
|
+
if {"STRUCTURE", "STRUCTURE_ID"}.issubset(data.columns):
|
|
24
|
+
action = ActionType.Information
|
|
25
|
+
if "ACTION" in data.columns:
|
|
26
|
+
unique_values = list(data["ACTION"].unique())
|
|
27
|
+
if len(unique_values) > 1 and "D" in unique_values:
|
|
28
|
+
unique_values.remove("D")
|
|
29
|
+
data = data[data["ACTION"] != "D"]
|
|
30
|
+
if len(unique_values) == 1: # If there is only one value, use it
|
|
31
|
+
action_value = unique_values[0]
|
|
32
|
+
if action_value == "M":
|
|
33
|
+
if not references_21:
|
|
34
|
+
raise Invalid(
|
|
35
|
+
"Invalid value on ACTION column",
|
|
36
|
+
"Value 'M' is only allowed for"
|
|
37
|
+
" SDMX-CSV 2.1 files.",
|
|
38
|
+
)
|
|
39
|
+
action = ActionType.Append
|
|
40
|
+
elif action_value in ACTION_SDMX_CSV_MAPPER_READING:
|
|
41
|
+
action = ACTION_SDMX_CSV_MAPPER_READING[action_value]
|
|
42
|
+
else:
|
|
43
|
+
raise Invalid(
|
|
44
|
+
"Invalid value on ACTION column",
|
|
45
|
+
"Invalid SDMX-CSV file. "
|
|
46
|
+
"Check the docs for the proper values "
|
|
47
|
+
"on ACTION column.",
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
del data["ACTION"] # Remove ACTION column from DataFrame
|
|
51
|
+
else:
|
|
52
|
+
raise Invalid(
|
|
53
|
+
"Invalid value on ACTION column",
|
|
54
|
+
"Invalid SDMX-CSV file. "
|
|
55
|
+
"Cannot have more than one value on ACTION column, "
|
|
56
|
+
"or 2 if D is present",
|
|
57
|
+
)
|
|
58
|
+
# Remove columns that are not needed
|
|
59
|
+
if "STRUCTURE_NAME" in data.columns:
|
|
60
|
+
data = data.drop(columns=["STRUCTURE_NAME"])
|
|
61
|
+
if "SERIES_KEYS" in data.columns:
|
|
62
|
+
data = data.drop(columns=["SERIES_KEYS"])
|
|
63
|
+
if "OBS_KEYS" in data.columns:
|
|
64
|
+
data = data.drop(columns=["OBS_KEYS"])
|
|
65
|
+
|
|
66
|
+
# For SDMX-CSV version 2, use 'STRUCTURE_ID'
|
|
67
|
+
# column as the structure id and 'STRUCTURE' as the structure type
|
|
68
|
+
structure_id = data["STRUCTURE_ID"].iloc[0]
|
|
69
|
+
structure_type = data["STRUCTURE"].iloc[0]
|
|
70
|
+
# Drop 'STRUCTURE' and 'STRUCTURE_ID' columns from DataFrame
|
|
71
|
+
df_csv = data.drop(["STRUCTURE", "STRUCTURE_ID"], axis=1)
|
|
72
|
+
if structure_id.count(":") == 2:
|
|
73
|
+
structure_id = ":".join(structure_id.split(":")[:2])
|
|
74
|
+
if structure_type == "DataStructure".lower():
|
|
75
|
+
urn = f"DataStructure={structure_id}"
|
|
76
|
+
elif structure_type == "Dataflow".lower():
|
|
77
|
+
urn = f"Dataflow={structure_id}"
|
|
78
|
+
elif structure_type == "dataprovision":
|
|
79
|
+
urn = f"ProvisionAgreement={structure_id}"
|
|
80
|
+
else:
|
|
81
|
+
raise Invalid(
|
|
82
|
+
"Invalid value on STRUCTURE column",
|
|
83
|
+
"Invalid SDMX-CSV file. "
|
|
84
|
+
"Check the docs for the proper values on STRUCTURE column.",
|
|
85
|
+
)
|
|
86
|
+
else:
|
|
87
|
+
# For SDMX-CSV version 1, use 'DATAFLOW' column as the structure id
|
|
88
|
+
structure_id = data["DATAFLOW"].iloc[0]
|
|
89
|
+
if structure_id.count(":") == 2:
|
|
90
|
+
structure_id = ":".join(structure_id.split(":")[:2])
|
|
91
|
+
# Drop 'DATAFLOW' column from DataFrame
|
|
92
|
+
df_csv = data.drop(["DATAFLOW"], axis=1)
|
|
93
|
+
|
|
94
|
+
urn = f"Dataflow={structure_id}"
|
|
95
|
+
return PandasDataset(
|
|
96
|
+
structure=urn,
|
|
97
|
+
data=df_csv,
|
|
98
|
+
action=action if action is not None else ActionType.Information,
|
|
99
|
+
)
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
from copy import copy
|
|
2
|
+
from typing import List, Literal, Optional, Sequence
|
|
3
|
+
|
|
4
|
+
import pandas as pd
|
|
5
|
+
|
|
6
|
+
from pysdmx.io.pd import PandasDataset
|
|
7
|
+
from pysdmx.model import Schema
|
|
8
|
+
from pysdmx.model.dataset import ActionType
|
|
9
|
+
from pysdmx.toolkit.pd._data_utils import format_labels, get_codes
|
|
10
|
+
|
|
11
|
+
SDMX_CSV_ACTION_MAPPER = {
|
|
12
|
+
ActionType.Append: "A",
|
|
13
|
+
ActionType.Replace: "R",
|
|
14
|
+
ActionType.Information: "I",
|
|
15
|
+
ActionType.Delete: "D",
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def __write_time_period(df: pd.DataFrame, time_format: str) -> None:
|
|
20
|
+
# TODO: Correct handle of normalized time format
|
|
21
|
+
raise NotImplementedError("Normalized time format is not implemented yet.")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def __write_keys(
|
|
25
|
+
df: pd.DataFrame, keys: Literal["obs", "series", "both"], schema: Schema
|
|
26
|
+
) -> None:
|
|
27
|
+
"""Writes the keys to the DataFrame.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
df: The DataFrame to write the keys to.
|
|
31
|
+
keys: to write or not the keys columns
|
|
32
|
+
If None, no keys are written.
|
|
33
|
+
If "obs", the keys are write as a single
|
|
34
|
+
column called "OBS_KEY".
|
|
35
|
+
If "series", the keys are write as a single
|
|
36
|
+
column called "SERIES_KEY".
|
|
37
|
+
If "both", the keys are write as two columns:
|
|
38
|
+
"OBS_KEY" and "SERIES_KEY".
|
|
39
|
+
schema: The schema to get the keys
|
|
40
|
+
"""
|
|
41
|
+
series_codes, obs_codes, group_codes = get_codes(
|
|
42
|
+
dimension_code="", structure=schema, data=df
|
|
43
|
+
)
|
|
44
|
+
del obs_codes[0]
|
|
45
|
+
obs_parts = []
|
|
46
|
+
series_parts = []
|
|
47
|
+
for k, v in df.items():
|
|
48
|
+
value = v.iloc[0]
|
|
49
|
+
if k in obs_codes:
|
|
50
|
+
obs_parts.append(str(value))
|
|
51
|
+
if k in series_codes:
|
|
52
|
+
series_parts.append(str(value))
|
|
53
|
+
obs_values = ".".join(obs_parts)
|
|
54
|
+
series_values = ".".join(series_parts)
|
|
55
|
+
if keys == "obs":
|
|
56
|
+
df.insert(0, "OBS_KEYS", obs_values)
|
|
57
|
+
elif keys == "series":
|
|
58
|
+
df.insert(0, "SERIES_KEYS", series_values)
|
|
59
|
+
else:
|
|
60
|
+
df.insert(0, "OBS_KEYS", obs_values)
|
|
61
|
+
df.insert(0, "SERIES_KEYS", series_values)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _write_csv_2_aux(
|
|
65
|
+
datasets: Sequence[PandasDataset],
|
|
66
|
+
labels: Optional[Literal["name", "id", "both"]] = None,
|
|
67
|
+
time_format: Optional[Literal["original", "normalized"]] = None,
|
|
68
|
+
keys: Optional[Literal["obs", "series", "both"]] = None,
|
|
69
|
+
references_21: bool = False,
|
|
70
|
+
) -> List[pd.DataFrame]:
|
|
71
|
+
dataframes = []
|
|
72
|
+
for dataset in datasets:
|
|
73
|
+
# Create a copy of the dataset
|
|
74
|
+
df: pd.DataFrame = copy(dataset.data)
|
|
75
|
+
structure_ref, unique_id = dataset.short_urn.split("=", maxsplit=1)
|
|
76
|
+
|
|
77
|
+
# Add additional attributes to the dataset
|
|
78
|
+
for k, v in dataset.attributes.items():
|
|
79
|
+
df[k] = v
|
|
80
|
+
|
|
81
|
+
if structure_ref in ["DataStructure", "Dataflow"]:
|
|
82
|
+
structure_ref = structure_ref.lower()
|
|
83
|
+
else:
|
|
84
|
+
structure_ref = "dataprovision"
|
|
85
|
+
|
|
86
|
+
if references_21 and dataset.action in [
|
|
87
|
+
ActionType.Information,
|
|
88
|
+
ActionType.Append,
|
|
89
|
+
]:
|
|
90
|
+
action_value = "M"
|
|
91
|
+
else:
|
|
92
|
+
action_value = SDMX_CSV_ACTION_MAPPER[dataset.action]
|
|
93
|
+
|
|
94
|
+
if time_format is not None and time_format != "original":
|
|
95
|
+
__write_time_period(df, time_format)
|
|
96
|
+
if keys is not None and isinstance(dataset.structure, Schema):
|
|
97
|
+
__write_keys(df, keys, dataset.structure)
|
|
98
|
+
if labels is not None and isinstance(dataset.structure, Schema):
|
|
99
|
+
format_labels(df, labels, dataset.structure.components)
|
|
100
|
+
df.insert(0, "STRUCTURE", structure_ref)
|
|
101
|
+
df.insert(
|
|
102
|
+
1,
|
|
103
|
+
"STRUCTURE_ID",
|
|
104
|
+
f"{unique_id}:{dataset.structure.name}"
|
|
105
|
+
if labels == "both"
|
|
106
|
+
else unique_id,
|
|
107
|
+
)
|
|
108
|
+
action_position = 2
|
|
109
|
+
if labels == "name":
|
|
110
|
+
action_position += 1
|
|
111
|
+
df.insert(2, "STRUCTURE_NAME", dataset.structure.name)
|
|
112
|
+
df.insert(action_position, "ACTION", action_value)
|
|
113
|
+
else:
|
|
114
|
+
df.insert(0, "STRUCTURE", structure_ref)
|
|
115
|
+
df.insert(1, "STRUCTURE_ID", unique_id)
|
|
116
|
+
df.insert(2, "ACTION", action_value)
|
|
117
|
+
dataframes.append(df)
|
|
118
|
+
return dataframes
|
|
@@ -6,23 +6,10 @@ from typing import Sequence
|
|
|
6
6
|
import pandas as pd
|
|
7
7
|
|
|
8
8
|
from pysdmx.errors import Invalid
|
|
9
|
+
from pysdmx.io.csv.__csv_aux_reader import __generate_dataset_from_sdmx_csv
|
|
9
10
|
from pysdmx.io.pd import PandasDataset
|
|
10
11
|
|
|
11
12
|
|
|
12
|
-
def __generate_dataset_from_sdmx_csv(data: pd.DataFrame) -> PandasDataset:
|
|
13
|
-
# For SDMX-CSV version 1, use 'DATAFLOW' column as the structure id
|
|
14
|
-
structure_id = data["DATAFLOW"].iloc[0]
|
|
15
|
-
# Drop 'DATAFLOW' column from DataFrame
|
|
16
|
-
df_csv = data.drop(["DATAFLOW"], axis=1)
|
|
17
|
-
urn = f"Dataflow={structure_id}"
|
|
18
|
-
|
|
19
|
-
# Return a Dataset object with the extracted information
|
|
20
|
-
return PandasDataset(
|
|
21
|
-
structure=urn,
|
|
22
|
-
data=df_csv,
|
|
23
|
-
)
|
|
24
|
-
|
|
25
|
-
|
|
26
13
|
def read(input_str: str) -> Sequence[PandasDataset]:
|
|
27
14
|
"""Reads csv data and returns a sequence of Datasets.
|
|
28
15
|
|
|
@@ -56,6 +43,14 @@ def read(input_str: str) -> Sequence[PandasDataset]:
|
|
|
56
43
|
# Check if any column headers contain ':', indicating mode, label or text
|
|
57
44
|
mode_label_text = any(":" in x for x in df_csv.columns)
|
|
58
45
|
|
|
46
|
+
# if values in the columns contain ':', split them
|
|
47
|
+
for col in df_csv.columns[1:]:
|
|
48
|
+
df_csv[col] = (
|
|
49
|
+
df_csv[col]
|
|
50
|
+
.astype(str)
|
|
51
|
+
.apply(lambda x: x.split(":")[0] if ":" in x else x)
|
|
52
|
+
)
|
|
53
|
+
|
|
59
54
|
# Determine the id column based on the SDMX-CSV version
|
|
60
55
|
id_column = "DATAFLOW"
|
|
61
56
|
|
|
@@ -2,15 +2,20 @@
|
|
|
2
2
|
|
|
3
3
|
from copy import copy
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import Optional, Sequence, Union
|
|
5
|
+
from typing import Literal, Optional, Sequence, Union
|
|
6
6
|
|
|
7
7
|
import pandas as pd
|
|
8
8
|
|
|
9
|
+
from pysdmx.io.csv.__csv_aux_writer import __write_time_period
|
|
9
10
|
from pysdmx.io.pd import PandasDataset
|
|
11
|
+
from pysdmx.model import Schema
|
|
12
|
+
from pysdmx.toolkit.pd._data_utils import format_labels
|
|
10
13
|
|
|
11
14
|
|
|
12
15
|
def write(
|
|
13
16
|
datasets: Sequence[PandasDataset],
|
|
17
|
+
labels: Optional[Literal["id", "both"]] = None,
|
|
18
|
+
time_format: Optional[Literal["original", "normalized"]] = None,
|
|
14
19
|
output_path: Optional[Union[str, Path]] = None,
|
|
15
20
|
) -> Optional[str]:
|
|
16
21
|
"""Write data to SDMX-CSV 1.0 format.
|
|
@@ -20,6 +25,15 @@ def write(
|
|
|
20
25
|
Must have the same components.
|
|
21
26
|
output_path: Path to write the data to.
|
|
22
27
|
If None, the data is returned as a string.
|
|
28
|
+
labels: How to write the name of the columns.
|
|
29
|
+
If None, only the IDs are written.
|
|
30
|
+
if "id", the names are written as ID only.
|
|
31
|
+
If "both", the names are witten as id:Name.
|
|
32
|
+
time_format: How to write the time period.
|
|
33
|
+
If None, the time period is not modified.
|
|
34
|
+
If "original", the time period is written as it
|
|
35
|
+
is in the dataset.
|
|
36
|
+
"Normalized" is not implemented yet.
|
|
23
37
|
|
|
24
38
|
Returns:
|
|
25
39
|
SDMX CSV data as a string, if output_path is None.
|
|
@@ -31,11 +45,23 @@ def write(
|
|
|
31
45
|
dataframes = []
|
|
32
46
|
for dataset in datasets:
|
|
33
47
|
df: pd.DataFrame = copy(dataset.data)
|
|
34
|
-
df.insert(0, "DATAFLOW", dataset.short_urn.split("=")[1])
|
|
35
48
|
|
|
36
49
|
# Add additional attributes to the dataset
|
|
37
50
|
for k, v in dataset.attributes.items():
|
|
38
51
|
df[k] = v
|
|
52
|
+
structure_id = dataset.short_urn.split("=")[1]
|
|
53
|
+
if time_format is not None and time_format != "original":
|
|
54
|
+
__write_time_period(df, time_format)
|
|
55
|
+
if labels is not None and isinstance(dataset.structure, Schema):
|
|
56
|
+
format_labels(df, labels, dataset.structure.components)
|
|
57
|
+
if labels == "id":
|
|
58
|
+
df.insert(0, "DATAFLOW", structure_id)
|
|
59
|
+
else:
|
|
60
|
+
df.insert(
|
|
61
|
+
0, "DATAFLOW", f"{structure_id}:{dataset.structure.name}"
|
|
62
|
+
)
|
|
63
|
+
else:
|
|
64
|
+
df.insert(0, "DATAFLOW", structure_id)
|
|
39
65
|
|
|
40
66
|
dataframes.append(df)
|
|
41
67
|
|
pysdmx/io/csv/sdmx20/__init__.py
CHANGED
|
@@ -6,68 +6,8 @@ from typing import Sequence
|
|
|
6
6
|
import pandas as pd
|
|
7
7
|
|
|
8
8
|
from pysdmx.errors import Invalid
|
|
9
|
+
from pysdmx.io.csv.__csv_aux_reader import __generate_dataset_from_sdmx_csv
|
|
9
10
|
from pysdmx.io.pd import PandasDataset
|
|
10
|
-
from pysdmx.model.dataset import ActionType
|
|
11
|
-
|
|
12
|
-
ACTION_SDMX_CSV_MAPPER_READING = {
|
|
13
|
-
"A": ActionType.Append,
|
|
14
|
-
"D": ActionType.Delete,
|
|
15
|
-
"R": ActionType.Replace,
|
|
16
|
-
"I": ActionType.Information,
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
def __generate_dataset_from_sdmx_csv(data: pd.DataFrame) -> PandasDataset:
|
|
21
|
-
# Extract Structure type and structure id
|
|
22
|
-
action = ActionType.Information
|
|
23
|
-
if "ACTION" in data.columns:
|
|
24
|
-
unique_values = list(data["ACTION"].unique())
|
|
25
|
-
if len(unique_values) > 1 and "D" in unique_values:
|
|
26
|
-
unique_values.remove("D")
|
|
27
|
-
data = data[data["ACTION"] != "D"]
|
|
28
|
-
if len(unique_values) == 1: # If there is only one value, use it
|
|
29
|
-
action_value = unique_values[0]
|
|
30
|
-
if action_value not in ACTION_SDMX_CSV_MAPPER_READING:
|
|
31
|
-
raise Invalid(
|
|
32
|
-
"Invalid value on ACTION column",
|
|
33
|
-
"Invalid SDMX-CSV 2.0 file. "
|
|
34
|
-
"Check the docs for the proper values on ACTION column.",
|
|
35
|
-
)
|
|
36
|
-
action = ACTION_SDMX_CSV_MAPPER_READING[action_value]
|
|
37
|
-
del data["ACTION"] # Remove ACTION column from DataFrame
|
|
38
|
-
else:
|
|
39
|
-
raise Invalid(
|
|
40
|
-
"Invalid value on ACTION column",
|
|
41
|
-
"Invalid SDMX-CSV 2.0 file. "
|
|
42
|
-
"Cannot have more than one value on ACTION column, "
|
|
43
|
-
"or 2 if D is present",
|
|
44
|
-
)
|
|
45
|
-
# For SDMX-CSV version 2, use 'STRUCTURE_ID'
|
|
46
|
-
# column as the structure id and 'STRUCTURE' as the structure type
|
|
47
|
-
structure_id = data["STRUCTURE_ID"].iloc[0]
|
|
48
|
-
structure_type = data["STRUCTURE"].iloc[0]
|
|
49
|
-
# Drop 'STRUCTURE' and 'STRUCTURE_ID' columns from DataFrame
|
|
50
|
-
df_csv = data.drop(["STRUCTURE", "STRUCTURE_ID"], axis=1)
|
|
51
|
-
|
|
52
|
-
if structure_type == "DataStructure".lower():
|
|
53
|
-
urn = f"DataStructure={structure_id}"
|
|
54
|
-
elif structure_type == "Dataflow".lower():
|
|
55
|
-
urn = f"Dataflow={structure_id}"
|
|
56
|
-
elif structure_type == "dataprovision":
|
|
57
|
-
urn = f"ProvisionAgreement={structure_id}"
|
|
58
|
-
else:
|
|
59
|
-
raise Invalid(
|
|
60
|
-
"Invalid value on STRUCTURE column",
|
|
61
|
-
"Invalid SDMX-CSV 2.0 file. "
|
|
62
|
-
"Check the docs for the proper values on STRUCTURE column.",
|
|
63
|
-
)
|
|
64
|
-
|
|
65
|
-
# Return a Dataset object with the extracted information
|
|
66
|
-
return PandasDataset(
|
|
67
|
-
structure=urn,
|
|
68
|
-
data=df_csv,
|
|
69
|
-
action=action,
|
|
70
|
-
)
|
|
71
11
|
|
|
72
12
|
|
|
73
13
|
def read(input_str: str) -> Sequence[PandasDataset]:
|
|
@@ -105,6 +45,13 @@ def read(input_str: str) -> Sequence[PandasDataset]:
|
|
|
105
45
|
df_csv = df_csv.astype(str).replace({"nan": "", "<NA>": ""})
|
|
106
46
|
# Check if any column headers contain ':', indicating mode, label or text
|
|
107
47
|
mode_label_text = any(":" in x for x in df_csv.columns)
|
|
48
|
+
# if values in the columns contain ':', split them
|
|
49
|
+
for col in df_csv.columns[2:]:
|
|
50
|
+
df_csv[col] = (
|
|
51
|
+
df_csv[col]
|
|
52
|
+
.astype(str)
|
|
53
|
+
.apply(lambda x: x.split(":")[0] if ":" in x else x)
|
|
54
|
+
)
|
|
108
55
|
|
|
109
56
|
id_column = "STRUCTURE_ID"
|
|
110
57
|
# If mode, label or text is present, modify the DataFrame
|