pysdmx 1.10.0__py3-none-any.whl → 1.10.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pysdmx/__init__.py +1 -1
- pysdmx/io/_pd_utils.py +83 -0
- pysdmx/io/csv/__csv_aux_writer.py +23 -0
- pysdmx/io/csv/sdmx10/reader/__init__.py +1 -1
- pysdmx/io/csv/sdmx10/writer/__init__.py +15 -9
- pysdmx/io/csv/sdmx20/reader/__init__.py +1 -1
- pysdmx/io/csv/sdmx20/writer/__init__.py +1 -1
- pysdmx/io/csv/sdmx21/reader/__init__.py +1 -1
- pysdmx/io/csv/sdmx21/writer/__init__.py +1 -1
- pysdmx/io/json/sdmxjson2/reader/doc_validation.py +0 -4
- pysdmx/io/xml/__write_data_aux.py +20 -7
- pysdmx/io/xml/__write_structure_specific_aux.py +71 -54
- pysdmx/io/xml/sdmx21/writer/generic.py +31 -19
- pysdmx/model/concept.py +16 -0
- {pysdmx-1.10.0.dist-info → pysdmx-1.10.0rc2.dist-info}/METADATA +1 -1
- {pysdmx-1.10.0.dist-info → pysdmx-1.10.0rc2.dist-info}/RECORD +18 -17
- {pysdmx-1.10.0.dist-info → pysdmx-1.10.0rc2.dist-info}/WHEEL +0 -0
- {pysdmx-1.10.0.dist-info → pysdmx-1.10.0rc2.dist-info}/licenses/LICENSE +0 -0
pysdmx/__init__.py
CHANGED
pysdmx/io/_pd_utils.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
|
|
3
|
+
from pysdmx.errors import Invalid
|
|
4
|
+
from pysdmx.model.concept import DataType
|
|
5
|
+
from pysdmx.model.dataflow import Schema
|
|
6
|
+
|
|
7
|
+
NUMERIC_TYPES = {
|
|
8
|
+
DataType.BIG_INTEGER,
|
|
9
|
+
DataType.COUNT,
|
|
10
|
+
DataType.DECIMAL,
|
|
11
|
+
DataType.DOUBLE,
|
|
12
|
+
DataType.FLOAT,
|
|
13
|
+
DataType.INCREMENTAL,
|
|
14
|
+
DataType.INTEGER,
|
|
15
|
+
DataType.LONG,
|
|
16
|
+
DataType.SHORT,
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _fill_na_values(data: pd.DataFrame, structure: Schema) -> pd.DataFrame:
|
|
21
|
+
"""Fills missing values in the DataFrame based on the component type.
|
|
22
|
+
|
|
23
|
+
Numeric components are filled with "NaN".
|
|
24
|
+
Other components are filled with "#N/A".
|
|
25
|
+
If the structure does not have components,
|
|
26
|
+
all missing values are filled with "".
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
data: The DataFrame to fill.
|
|
30
|
+
structure: The structure definition (´Schema´).
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
The DataFrame with filled missing values.
|
|
34
|
+
|
|
35
|
+
Raises:
|
|
36
|
+
Invalid: If the structure does not have components.
|
|
37
|
+
"""
|
|
38
|
+
for component in structure.components:
|
|
39
|
+
if component.id in data.columns:
|
|
40
|
+
if component.dtype in NUMERIC_TYPES:
|
|
41
|
+
data[component.id] = (
|
|
42
|
+
data[component.id].astype(object).fillna("NaN")
|
|
43
|
+
)
|
|
44
|
+
else:
|
|
45
|
+
data[component.id] = (
|
|
46
|
+
data[component.id].astype(object).fillna("#N/A")
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
return data
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _validate_explicit_null_values(
|
|
53
|
+
data: pd.DataFrame, structure: Schema
|
|
54
|
+
) -> None:
|
|
55
|
+
"""Validates that explicit null values are correct for the component type.
|
|
56
|
+
|
|
57
|
+
Numeric components must not contain "#N/A".
|
|
58
|
+
Non-numeric components must not contain "NaN".
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
data: The DataFrame to validate.
|
|
62
|
+
structure: The structure definition (´Schema´).
|
|
63
|
+
|
|
64
|
+
Raises:
|
|
65
|
+
Invalid: If invalid null values are found.
|
|
66
|
+
"""
|
|
67
|
+
for component in structure.components:
|
|
68
|
+
if component.id in data.columns:
|
|
69
|
+
series = data[component.id].astype(str)
|
|
70
|
+
if component.dtype in NUMERIC_TYPES:
|
|
71
|
+
# Numeric: #N/A is invalid
|
|
72
|
+
if series.isin(["#N/A"]).any():
|
|
73
|
+
raise Invalid(
|
|
74
|
+
f"Invalid null value '#N/A' in numeric component "
|
|
75
|
+
f"'{component.id}'."
|
|
76
|
+
)
|
|
77
|
+
else:
|
|
78
|
+
# Non-numeric: NaN is invalid
|
|
79
|
+
if series.isin(["NaN"]).any():
|
|
80
|
+
raise Invalid(
|
|
81
|
+
f"Invalid null value 'NaN' in non-numeric component "
|
|
82
|
+
f"'{component.id}'."
|
|
83
|
+
)
|
|
@@ -3,6 +3,8 @@ from typing import List, Literal, Optional, Sequence
|
|
|
3
3
|
|
|
4
4
|
import pandas as pd
|
|
5
5
|
|
|
6
|
+
from pysdmx.errors import Invalid
|
|
7
|
+
from pysdmx.io._pd_utils import _fill_na_values
|
|
6
8
|
from pysdmx.io.pd import PandasDataset
|
|
7
9
|
from pysdmx.model import Schema
|
|
8
10
|
from pysdmx.model.dataset import ActionType
|
|
@@ -16,6 +18,25 @@ SDMX_CSV_ACTION_MAPPER = {
|
|
|
16
18
|
}
|
|
17
19
|
|
|
18
20
|
|
|
21
|
+
def _validate_schema_exists(dataset: PandasDataset) -> Schema:
|
|
22
|
+
"""Validates that the dataset has a Schema defined.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
dataset: The dataset to validate.
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
The `Schema` from the dataset.
|
|
29
|
+
|
|
30
|
+
Raises:
|
|
31
|
+
Invalid: If the structure is not a `Schema`.
|
|
32
|
+
"""
|
|
33
|
+
if not isinstance(dataset.structure, Schema):
|
|
34
|
+
raise Invalid(
|
|
35
|
+
"Dataset Structure is not a Schema. Cannot perform operation."
|
|
36
|
+
)
|
|
37
|
+
return dataset.structure
|
|
38
|
+
|
|
39
|
+
|
|
19
40
|
def __write_time_period(df: pd.DataFrame, time_format: str) -> None:
|
|
20
41
|
# TODO: Correct handle of normalized time format
|
|
21
42
|
raise NotImplementedError("Normalized time format is not implemented yet.")
|
|
@@ -70,8 +91,10 @@ def _write_csv_2_aux(
|
|
|
70
91
|
) -> List[pd.DataFrame]:
|
|
71
92
|
dataframes = []
|
|
72
93
|
for dataset in datasets:
|
|
94
|
+
schema = _validate_schema_exists(dataset)
|
|
73
95
|
# Create a copy of the dataset
|
|
74
96
|
df: pd.DataFrame = copy(dataset.data)
|
|
97
|
+
df = _fill_na_values(df, schema)
|
|
75
98
|
structure_ref, unique_id = dataset.short_urn.split("=", maxsplit=1)
|
|
76
99
|
|
|
77
100
|
# Add additional attributes to the dataset
|
|
@@ -24,7 +24,7 @@ def read(input_str: str) -> Sequence[PandasDataset]:
|
|
|
24
24
|
"""
|
|
25
25
|
# Get Dataframe from CSV file
|
|
26
26
|
df_csv = pd.read_csv(
|
|
27
|
-
StringIO(input_str), keep_default_na=False, na_values=[
|
|
27
|
+
StringIO(input_str), keep_default_na=False, na_values=[]
|
|
28
28
|
)
|
|
29
29
|
# Drop empty columns
|
|
30
30
|
df_csv = df_csv.dropna(axis=1, how="all")
|
|
@@ -6,9 +6,12 @@ from typing import Literal, Optional, Sequence, Union
|
|
|
6
6
|
|
|
7
7
|
import pandas as pd
|
|
8
8
|
|
|
9
|
-
from pysdmx.io.
|
|
9
|
+
from pysdmx.io._pd_utils import _fill_na_values
|
|
10
|
+
from pysdmx.io.csv.__csv_aux_writer import (
|
|
11
|
+
__write_time_period,
|
|
12
|
+
_validate_schema_exists,
|
|
13
|
+
)
|
|
10
14
|
from pysdmx.io.pd import PandasDataset
|
|
11
|
-
from pysdmx.model import Schema
|
|
12
15
|
from pysdmx.toolkit.pd._data_utils import format_labels
|
|
13
16
|
|
|
14
17
|
|
|
@@ -44,22 +47,26 @@ def write(
|
|
|
44
47
|
# Create a copy of the dataset
|
|
45
48
|
dataframes = []
|
|
46
49
|
for dataset in datasets:
|
|
50
|
+
# Validate that dataset has a proper Schema
|
|
51
|
+
schema = _validate_schema_exists(dataset)
|
|
52
|
+
|
|
47
53
|
df: pd.DataFrame = copy(dataset.data)
|
|
48
54
|
|
|
55
|
+
# Fill missing values
|
|
56
|
+
df = _fill_na_values(df, schema)
|
|
57
|
+
|
|
49
58
|
# Add additional attributes to the dataset
|
|
50
59
|
for k, v in dataset.attributes.items():
|
|
51
60
|
df[k] = v
|
|
52
61
|
structure_id = dataset.short_urn.split("=")[1]
|
|
53
62
|
if time_format is not None and time_format != "original":
|
|
54
63
|
__write_time_period(df, time_format)
|
|
55
|
-
if labels is not None
|
|
56
|
-
format_labels(df, labels,
|
|
64
|
+
if labels is not None:
|
|
65
|
+
format_labels(df, labels, schema.components)
|
|
57
66
|
if labels == "id":
|
|
58
67
|
df.insert(0, "DATAFLOW", structure_id)
|
|
59
68
|
else:
|
|
60
|
-
df.insert(
|
|
61
|
-
0, "DATAFLOW", f"{structure_id}:{dataset.structure.name}"
|
|
62
|
-
)
|
|
69
|
+
df.insert(0, "DATAFLOW", f"{structure_id}:{schema.name}")
|
|
63
70
|
else:
|
|
64
71
|
df.insert(0, "DATAFLOW", structure_id)
|
|
65
72
|
|
|
@@ -68,8 +75,7 @@ def write(
|
|
|
68
75
|
# Concatenate the dataframes
|
|
69
76
|
all_data = pd.concat(dataframes, ignore_index=True, axis=0)
|
|
70
77
|
|
|
71
|
-
|
|
72
|
-
all_data = all_data.astype(str).replace({"nan": "", "<NA>": ""})
|
|
78
|
+
all_data = all_data.astype(str)
|
|
73
79
|
# If the output path is an empty string we use None
|
|
74
80
|
output_path = (
|
|
75
81
|
None
|
|
@@ -24,7 +24,7 @@ def read(input_str: str) -> Sequence[PandasDataset]:
|
|
|
24
24
|
"""
|
|
25
25
|
# Get Dataframe from CSV file
|
|
26
26
|
df_csv = pd.read_csv(
|
|
27
|
-
StringIO(input_str), keep_default_na=False, na_values=[
|
|
27
|
+
StringIO(input_str), keep_default_na=False, na_values=[]
|
|
28
28
|
)
|
|
29
29
|
# Drop empty columns
|
|
30
30
|
df_csv = df_csv.dropna(axis=1, how="all")
|
|
@@ -24,7 +24,7 @@ def read(input_str: str) -> Sequence[PandasDataset]:
|
|
|
24
24
|
"""
|
|
25
25
|
# Get Dataframe from CSV file
|
|
26
26
|
df_csv = pd.read_csv(
|
|
27
|
-
StringIO(input_str), keep_default_na=False, na_values=[
|
|
27
|
+
StringIO(input_str), keep_default_na=False, na_values=[]
|
|
28
28
|
)
|
|
29
29
|
# Drop empty columns
|
|
30
30
|
df_csv = df_csv.dropna(axis=1, how="all")
|
|
@@ -42,20 +42,31 @@ def check_dimension_at_observation(
|
|
|
42
42
|
return dimension_at_observation
|
|
43
43
|
|
|
44
44
|
|
|
45
|
-
def writing_validation(dataset: PandasDataset) ->
|
|
46
|
-
"""Structural validation of the dataset.
|
|
45
|
+
def writing_validation(dataset: PandasDataset) -> Schema:
|
|
46
|
+
"""Structural validation of the dataset.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
dataset: The dataset to validate.
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
The `Schema` from the dataset.
|
|
53
|
+
|
|
54
|
+
Raises:
|
|
55
|
+
Invalid: If the structure is not a `Schema` or validation fails.
|
|
56
|
+
"""
|
|
47
57
|
if not isinstance(dataset.structure, Schema):
|
|
48
58
|
raise Invalid(
|
|
49
59
|
"Dataset Structure is not a Schema. Cannot perform operation."
|
|
50
60
|
)
|
|
61
|
+
schema = dataset.structure
|
|
51
62
|
required_components = [
|
|
52
63
|
comp.id
|
|
53
|
-
for comp in
|
|
64
|
+
for comp in schema.components
|
|
54
65
|
if comp.role in (Role.DIMENSION, Role.MEASURE)
|
|
55
66
|
]
|
|
56
67
|
required_components.extend(
|
|
57
68
|
att.id
|
|
58
|
-
for att in
|
|
69
|
+
for att in schema.components.attributes
|
|
59
70
|
if (
|
|
60
71
|
att.required
|
|
61
72
|
and att.attachment_level is not None
|
|
@@ -64,7 +75,7 @@ def writing_validation(dataset: PandasDataset) -> None:
|
|
|
64
75
|
)
|
|
65
76
|
non_required = [
|
|
66
77
|
comp.id
|
|
67
|
-
for comp in
|
|
78
|
+
for comp in schema.components
|
|
68
79
|
if comp.id not in required_components
|
|
69
80
|
]
|
|
70
81
|
# Columns match components
|
|
@@ -80,9 +91,11 @@ def writing_validation(dataset: PandasDataset) -> None:
|
|
|
80
91
|
f"Difference: {', '.join(difference)}"
|
|
81
92
|
)
|
|
82
93
|
# Check if the dataset has at least one dimension and one measure
|
|
83
|
-
if not
|
|
94
|
+
if not schema.components.dimensions:
|
|
84
95
|
raise Invalid(
|
|
85
96
|
"The dataset structure must have at least one dimension."
|
|
86
97
|
)
|
|
87
|
-
if not
|
|
98
|
+
if not schema.components.measures:
|
|
88
99
|
raise Invalid("The dataset structure must have at least one measure.")
|
|
100
|
+
|
|
101
|
+
return schema
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
# mypy: disable-error-code="union-attr"
|
|
2
2
|
"""Module for writing SDMX-ML 3.0 Structure Specific auxiliary functions."""
|
|
3
3
|
|
|
4
|
-
from typing import Any, Dict, List
|
|
4
|
+
from typing import Any, Dict, Hashable, List
|
|
5
5
|
|
|
6
6
|
import pandas as pd
|
|
7
7
|
|
|
8
|
+
from pysdmx.io._pd_utils import _fill_na_values
|
|
8
9
|
from pysdmx.io.pd import PandasDataset
|
|
9
10
|
from pysdmx.io.xml.__write_aux import (
|
|
10
11
|
ABBR_MSG,
|
|
@@ -69,9 +70,6 @@ def __write_data_structure_specific(
|
|
|
69
70
|
outfile = ""
|
|
70
71
|
|
|
71
72
|
for i, (short_urn, dataset) in enumerate(datasets.items()):
|
|
72
|
-
dataset.data = dataset.data.astype(str).replace(
|
|
73
|
-
{"nan": "", "<NA>": ""}
|
|
74
|
-
)
|
|
75
73
|
outfile += __write_data_single_dataset(
|
|
76
74
|
dataset=dataset,
|
|
77
75
|
prettyprint=prettyprint,
|
|
@@ -115,8 +113,12 @@ def __write_data_single_dataset(
|
|
|
115
113
|
structure_urn = get_structure(dataset)
|
|
116
114
|
id_structure = parse_short_urn(structure_urn).id
|
|
117
115
|
sdmx_type = parse_short_urn(structure_urn).id
|
|
116
|
+
|
|
117
|
+
# Validate structure before writing
|
|
118
|
+
schema = writing_validation(dataset)
|
|
119
|
+
|
|
118
120
|
# Remove nan values from DataFrame
|
|
119
|
-
dataset.data = dataset.data
|
|
121
|
+
dataset.data = _fill_na_values(dataset.data, schema)
|
|
120
122
|
|
|
121
123
|
nl = "\n" if prettyprint else ""
|
|
122
124
|
child1 = "\t" if prettyprint else ""
|
|
@@ -139,7 +141,6 @@ def __write_data_single_dataset(
|
|
|
139
141
|
if dim == ALL_DIM:
|
|
140
142
|
data += __memory_optimization_writing(dataset, prettyprint)
|
|
141
143
|
else:
|
|
142
|
-
writing_validation(dataset)
|
|
143
144
|
series_codes, obs_codes, group_codes = get_codes(
|
|
144
145
|
dimension_code=dim,
|
|
145
146
|
structure=dataset.structure, # type: ignore[arg-type]
|
|
@@ -230,69 +231,85 @@ def __obs_processing(data: pd.DataFrame, prettyprint: bool = True) -> str:
|
|
|
230
231
|
return "".join(iterator)
|
|
231
232
|
|
|
232
233
|
|
|
233
|
-
def
|
|
234
|
-
|
|
235
|
-
series_codes: List[str],
|
|
236
|
-
obs_codes: List[str],
|
|
237
|
-
prettyprint: bool = True,
|
|
234
|
+
def __format_ser_str(
|
|
235
|
+
data_info: Dict[Any, Any], prettyprint: bool = True
|
|
238
236
|
) -> str:
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
lambda x: __format_dict_ser(out_list, x)
|
|
244
|
-
)
|
|
245
|
-
|
|
246
|
-
return "".join(out_list)
|
|
247
|
-
|
|
248
|
-
def __format_dict_ser(
|
|
249
|
-
output_list: List[str],
|
|
250
|
-
obs: Any,
|
|
251
|
-
) -> Any:
|
|
252
|
-
"""Formats the series as key=value pairs."""
|
|
253
|
-
# Creating the observation dict,
|
|
254
|
-
# we always get the first element on Series
|
|
255
|
-
# as we are grouping by it
|
|
256
|
-
data_dict["Series"][0]["Obs"] = obs.to_dict(orient="records")
|
|
257
|
-
output_list.append(__format_ser_str(data_dict["Series"][0]))
|
|
258
|
-
# We remove the data for series as it is no longer necessary
|
|
259
|
-
del data_dict["Series"][0]
|
|
237
|
+
"""Formats the series as key=value pairs."""
|
|
238
|
+
child2 = "\t\t" if prettyprint else ""
|
|
239
|
+
child3 = "\t\t\t" if prettyprint else ""
|
|
240
|
+
nl = "\n" if prettyprint else ""
|
|
260
241
|
|
|
261
|
-
|
|
262
|
-
"""Formats the series as key=value pairs."""
|
|
263
|
-
child2 = "\t\t" if prettyprint else ""
|
|
264
|
-
child3 = "\t\t\t" if prettyprint else ""
|
|
265
|
-
nl = "\n" if prettyprint else ""
|
|
242
|
+
out_element = f"{child2}<Series "
|
|
266
243
|
|
|
267
|
-
|
|
244
|
+
for k, v in data_info.items():
|
|
245
|
+
if k != "Obs":
|
|
246
|
+
out_element += f"{k}={__escape_xml(str(v))!r} "
|
|
268
247
|
|
|
269
|
-
|
|
270
|
-
if k != "Obs":
|
|
271
|
-
out_element += f"{k}={__escape_xml(str(v))!r} "
|
|
248
|
+
out_element += f">{nl}"
|
|
272
249
|
|
|
273
|
-
|
|
250
|
+
for obs in data_info["Obs"]:
|
|
251
|
+
out_element += f"{child3}<Obs "
|
|
274
252
|
|
|
275
|
-
for
|
|
276
|
-
out_element += f"{
|
|
253
|
+
for k, v in obs.items():
|
|
254
|
+
out_element += f"{k}={__escape_xml(str(v))!r} "
|
|
277
255
|
|
|
278
|
-
|
|
279
|
-
out_element += f"{k}={__escape_xml(str(v))!r} "
|
|
256
|
+
out_element += f"/>{nl}"
|
|
280
257
|
|
|
281
|
-
|
|
258
|
+
out_element += f"{child2}</Series>{nl}"
|
|
282
259
|
|
|
283
|
-
|
|
260
|
+
return out_element
|
|
284
261
|
|
|
285
|
-
return out_element
|
|
286
262
|
|
|
287
|
-
|
|
288
|
-
data
|
|
289
|
-
|
|
263
|
+
def __build_series_dict(
|
|
264
|
+
data: pd.DataFrame, series_codes: List[str]
|
|
265
|
+
) -> Dict[str, List[Dict[Hashable, Any]]]:
|
|
266
|
+
"""Build series dictionary from data."""
|
|
267
|
+
if not series_codes:
|
|
268
|
+
return {"Series": [{}] if not data.empty else []}
|
|
269
|
+
return {
|
|
290
270
|
"Series": data[series_codes]
|
|
291
271
|
.drop_duplicates()
|
|
292
272
|
.reset_index(drop=True)
|
|
293
273
|
.to_dict(orient="records")
|
|
294
274
|
}
|
|
295
275
|
|
|
296
|
-
out = __generate_series_str()
|
|
297
276
|
|
|
298
|
-
|
|
277
|
+
def __process_series_observations(
|
|
278
|
+
data: pd.DataFrame,
|
|
279
|
+
series_codes: List[str],
|
|
280
|
+
obs_codes: List[str],
|
|
281
|
+
data_dict: Dict[str, List[Dict[Hashable, Any]]],
|
|
282
|
+
prettyprint: bool = True,
|
|
283
|
+
) -> str:
|
|
284
|
+
"""Process series and their observations into XML string."""
|
|
285
|
+
out_list: List[str] = []
|
|
286
|
+
|
|
287
|
+
def append_series_with_obs(obs: Any) -> str:
|
|
288
|
+
"""Append series with observations to output list."""
|
|
289
|
+
data_dict["Series"][0]["Obs"] = obs.to_dict(orient="records")
|
|
290
|
+
result = __format_ser_str(data_dict["Series"][0], prettyprint)
|
|
291
|
+
out_list.append(result)
|
|
292
|
+
del data_dict["Series"][0]
|
|
293
|
+
return result
|
|
294
|
+
|
|
295
|
+
if not series_codes:
|
|
296
|
+
if not data.empty:
|
|
297
|
+
append_series_with_obs(data[obs_codes])
|
|
298
|
+
else:
|
|
299
|
+
data.groupby(by=series_codes)[obs_codes].apply(append_series_with_obs)
|
|
300
|
+
|
|
301
|
+
return "".join(out_list)
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
def __series_processing(
|
|
305
|
+
data: pd.DataFrame,
|
|
306
|
+
series_codes: List[str],
|
|
307
|
+
obs_codes: List[str],
|
|
308
|
+
prettyprint: bool = True,
|
|
309
|
+
) -> str:
|
|
310
|
+
"""Write series to SDMX-ML Structure-Specific format."""
|
|
311
|
+
data = data.sort_values(series_codes, axis=0)
|
|
312
|
+
data_dict = __build_series_dict(data, series_codes)
|
|
313
|
+
return __process_series_observations(
|
|
314
|
+
data, series_codes, obs_codes, data_dict, prettyprint
|
|
315
|
+
)
|
|
@@ -2,10 +2,11 @@
|
|
|
2
2
|
"""Module for writing SDMX-ML 2.1 Generic data messages."""
|
|
3
3
|
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
|
|
5
|
+
from typing import Any, Dict, Hashable, List, Optional, Sequence, Tuple, Union
|
|
6
6
|
|
|
7
7
|
import pandas as pd
|
|
8
8
|
|
|
9
|
+
from pysdmx.io._pd_utils import _fill_na_values
|
|
9
10
|
from pysdmx.io.format import Format
|
|
10
11
|
from pysdmx.io.pd import PandasDataset
|
|
11
12
|
from pysdmx.io.xml.__write_aux import (
|
|
@@ -122,7 +123,6 @@ def __write_data_generic(
|
|
|
122
123
|
|
|
123
124
|
for short_urn, dataset in datasets.items():
|
|
124
125
|
writing_validation(dataset)
|
|
125
|
-
dataset.data = dataset.data.fillna("").astype(str)
|
|
126
126
|
outfile += __write_data_single_dataset(
|
|
127
127
|
dataset=dataset,
|
|
128
128
|
prettyprint=prettyprint,
|
|
@@ -160,7 +160,8 @@ def __write_data_single_dataset(
|
|
|
160
160
|
outfile = ""
|
|
161
161
|
structure_urn = get_structure(dataset)
|
|
162
162
|
id_structure = parse_short_urn(structure_urn).id
|
|
163
|
-
|
|
163
|
+
schema = writing_validation(dataset)
|
|
164
|
+
dataset.data = _fill_na_values(dataset.data, schema)
|
|
164
165
|
|
|
165
166
|
nl = "\n" if prettyprint else ""
|
|
166
167
|
child1 = "\t" if prettyprint else ""
|
|
@@ -347,9 +348,14 @@ def __series_processing(
|
|
|
347
348
|
) -> str:
|
|
348
349
|
def __generate_series_str() -> str:
|
|
349
350
|
out_list: List[str] = []
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
351
|
+
group_cols = series_codes + series_att_codes
|
|
352
|
+
if not group_cols:
|
|
353
|
+
if not data.empty:
|
|
354
|
+
__format_dict_ser(out_list, data)
|
|
355
|
+
else:
|
|
356
|
+
data.groupby(by=group_cols)[data.columns].apply(
|
|
357
|
+
lambda x: __format_dict_ser(out_list, x)
|
|
358
|
+
)
|
|
353
359
|
|
|
354
360
|
return "".join(out_list)
|
|
355
361
|
|
|
@@ -359,13 +365,14 @@ def __series_processing(
|
|
|
359
365
|
) -> Any:
|
|
360
366
|
obs_data = group_data[obs_codes + obs_att_codes].copy()
|
|
361
367
|
data_dict["Series"][0]["Obs"] = obs_data.to_dict(orient="records")
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
368
|
+
if series_att_codes:
|
|
369
|
+
data_dict["Series"][0].update(
|
|
370
|
+
{
|
|
371
|
+
k: v
|
|
372
|
+
for k, v in group_data[series_att_codes].iloc[0].items()
|
|
373
|
+
if k in series_att_codes
|
|
374
|
+
}
|
|
375
|
+
)
|
|
369
376
|
output_list.append(
|
|
370
377
|
__format_ser_str(
|
|
371
378
|
data_info=data_dict["Series"][0],
|
|
@@ -380,12 +387,17 @@ def __series_processing(
|
|
|
380
387
|
|
|
381
388
|
# Getting each datapoint from data and creating dict
|
|
382
389
|
data = data.sort_values(series_codes, axis=0)
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
390
|
+
if not series_codes:
|
|
391
|
+
data_dict: Dict[str, List[Dict[Hashable, Any]]] = {
|
|
392
|
+
"Series": [{}] if not data.empty else []
|
|
393
|
+
}
|
|
394
|
+
else:
|
|
395
|
+
data_dict = {
|
|
396
|
+
"Series": data[series_codes]
|
|
397
|
+
.drop_duplicates()
|
|
398
|
+
.reset_index(drop=True)
|
|
399
|
+
.to_dict(orient="records")
|
|
400
|
+
}
|
|
389
401
|
|
|
390
402
|
out = __generate_series_str()
|
|
391
403
|
|
pysdmx/model/concept.py
CHANGED
|
@@ -35,6 +35,8 @@ class DataType(str, Enum):
|
|
|
35
35
|
"""True or False."""
|
|
36
36
|
COUNT = "Count"
|
|
37
37
|
"""A simple incrementing integer type."""
|
|
38
|
+
DATA_SET_REFERENCE = "DataSetReference"
|
|
39
|
+
"""Reference to a data set."""
|
|
38
40
|
DATE = "GregorianDay"
|
|
39
41
|
"""A ISO 8601 date (e.g. ``2011-06-17``)."""
|
|
40
42
|
DATE_TIME = "DateTime"
|
|
@@ -47,12 +49,24 @@ class DataType(str, Enum):
|
|
|
47
49
|
"""A decimal number (8 bytes)."""
|
|
48
50
|
DURATION = "Duration"
|
|
49
51
|
"""An ISO 8601 duration."""
|
|
52
|
+
EXCLUSIVE_VALUE_RANGE = "ExclusiveValueRange"
|
|
53
|
+
"""A range of values excluding boundaries."""
|
|
50
54
|
FLOAT = "Float"
|
|
51
55
|
"""A decimal number (4 bytes)."""
|
|
56
|
+
GEOSPATIAL_INFORMATION = "GeospatialInformation"
|
|
57
|
+
"""Geospatial data format."""
|
|
52
58
|
GREGORIAN_TIME_PERIOD = "GregorianTimePeriod"
|
|
53
59
|
"""This is the union of YEAR, YEAR_MONTH, and DATE."""
|
|
60
|
+
IDENTIFIABLE_REFERENCE = "IdentifiableReference"
|
|
61
|
+
"""Reference to an identifiable object."""
|
|
62
|
+
INCLUSIVE_VALUE_RANGE = "InclusiveValueRange"
|
|
63
|
+
"""A range of values including boundaries."""
|
|
64
|
+
INCREMENTAL = "Incremental"
|
|
65
|
+
"""An integer type that increases sequentially."""
|
|
54
66
|
INTEGER = "Integer"
|
|
55
67
|
"""A whole number (4 bytes)."""
|
|
68
|
+
KEY_VALUES = "KeyValues"
|
|
69
|
+
"""Key values reference."""
|
|
56
70
|
LONG = "Long"
|
|
57
71
|
"""A whole number (8 bytes)."""
|
|
58
72
|
MONTH = "Month"
|
|
@@ -85,6 +99,8 @@ class DataType(str, Enum):
|
|
|
85
99
|
"""A string (as immutable sequence of Unicode code points)."""
|
|
86
100
|
TIME = "Time"
|
|
87
101
|
"""An ISO 8601 time (e.g. ``12:50:42``)."""
|
|
102
|
+
TIMES_RANGE = "TimesRange"
|
|
103
|
+
"""A range of time periods."""
|
|
88
104
|
URI = "URI"
|
|
89
105
|
"""A uniform resource identifier, such as a URL."""
|
|
90
106
|
XHTML = "XHTML"
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
pysdmx/__extras_check.py,sha256=Tmluui2OuJVyJB6a1Jl0PlrRjpsswhtCjAqtRLOSero,2059
|
|
2
|
-
pysdmx/__init__.py,sha256=
|
|
2
|
+
pysdmx/__init__.py,sha256=Q03oQ7Jwt3q1RWCdX2lHnJ8s7cd-qe2_l47m90Dqdgo,71
|
|
3
3
|
pysdmx/api/__init__.py,sha256=8lRaF6kEO51ehl0fmW_pHLvkN_34TtEhqhr3oKo6E6g,26
|
|
4
4
|
pysdmx/api/dc/__init__.py,sha256=oPU32X8CRZy4T1to9mO5KMqMwxQsVI424dPqai-I8zI,121
|
|
5
5
|
pysdmx/api/dc/_api.py,sha256=poy1FYFXnF6maBGy5lpOodf32-7QQjH8PCBNDkuOXxQ,7747
|
|
@@ -25,18 +25,19 @@ pysdmx/api/qb/structure.py,sha256=0m_Fmp410Rfjdv0ehLS8ivwccwn-xfBkcIdYayu8pgg,17
|
|
|
25
25
|
pysdmx/api/qb/util.py,sha256=at2Sb2kVltSTDc1gKiqG6HtIFhjSx-Msbe--wCvRbQI,3667
|
|
26
26
|
pysdmx/errors.py,sha256=9bfujYykzfGMa1TuUOmH9QqghnZGOo556fvbKH2jFa8,3295
|
|
27
27
|
pysdmx/io/__init__.py,sha256=96ZCY1PfcWp_q2Nlo2tHJAK31sH_b05v9UkbR0vGdg0,180
|
|
28
|
+
pysdmx/io/_pd_utils.py,sha256=NgGhjn9clB0xD_3hsOzwXci8ix9dLe2Lt2DAZ9Tkyzw,2555
|
|
28
29
|
pysdmx/io/csv/__csv_aux_reader.py,sha256=2RGzhga_VDnh2OVX_Bo-rR2hgAQxHXa7zt7-D5MVBu0,3994
|
|
29
|
-
pysdmx/io/csv/__csv_aux_writer.py,sha256=
|
|
30
|
+
pysdmx/io/csv/__csv_aux_writer.py,sha256=4PlnMuzv_XUoJnZCS8GjqzTpjsSf79bmD9KTmTH24KE,4779
|
|
30
31
|
pysdmx/io/csv/__init__.py,sha256=53f2rPkwILigOqArgRQOOwLk-ha6zVTe4EypIsR7K6I,107
|
|
31
32
|
pysdmx/io/csv/sdmx10/__init__.py,sha256=NAAm_yodK-gzkuzewGQeYpF3f5nZmDA4vWGfT2KGTWc,38
|
|
32
|
-
pysdmx/io/csv/sdmx10/reader/__init__.py,sha256=
|
|
33
|
-
pysdmx/io/csv/sdmx10/writer/__init__.py,sha256=
|
|
33
|
+
pysdmx/io/csv/sdmx10/reader/__init__.py,sha256=l7nK6Q-VpOhd_VFYNxvd05b8GxyNQXpI7aBPQYvy4LM,2733
|
|
34
|
+
pysdmx/io/csv/sdmx10/writer/__init__.py,sha256=_6u3ANN84JW1wb-9YpgzKSXcpMdUBRmDtFK2fVG_r7I,2959
|
|
34
35
|
pysdmx/io/csv/sdmx20/__init__.py,sha256=6_YCb4iuUWJRS9y0KSdf4ebNKblSlnTTzNC5c19kNk8,38
|
|
35
|
-
pysdmx/io/csv/sdmx20/reader/__init__.py,sha256=
|
|
36
|
-
pysdmx/io/csv/sdmx20/writer/__init__.py,sha256=
|
|
36
|
+
pysdmx/io/csv/sdmx20/reader/__init__.py,sha256=DJMLkE4YKlBCxK4R38R3JXkd3pfiYtNa9HB8PIim0cQ,2844
|
|
37
|
+
pysdmx/io/csv/sdmx20/writer/__init__.py,sha256=vaeaRT1qtAl3dkkzL2EeCrkpLmeL_r3ivqqakVDxAn0,2456
|
|
37
38
|
pysdmx/io/csv/sdmx21/__init__.py,sha256=I3_dwi4A4if62_mwEjqbOa-F7mhoIMf0D6szpDf3W7c,38
|
|
38
|
-
pysdmx/io/csv/sdmx21/reader/__init__.py,sha256=
|
|
39
|
-
pysdmx/io/csv/sdmx21/writer/__init__.py,sha256=
|
|
39
|
+
pysdmx/io/csv/sdmx21/reader/__init__.py,sha256=hoXkOJM8prZZ6QESG3ZjReN2P-8pGryN6CPeEYtrqjw,2864
|
|
40
|
+
pysdmx/io/csv/sdmx21/writer/__init__.py,sha256=B-0ZYFxUm1cmv0-rwDaE4TmIE0fnAeojNPIXXP2ATXQ,2451
|
|
40
41
|
pysdmx/io/format.py,sha256=EO-PyYpiU0WswvEGA5UHokladxPezcwBUo1AJTqxp1Q,5250
|
|
41
42
|
pysdmx/io/input_processor.py,sha256=P1_jKegrOyV7EaZLjLrq8fX2u1EI7gPBJoKvlBCNkP0,6967
|
|
42
43
|
pysdmx/io/json/fusion/messages/__init__.py,sha256=ac2jWfjGGBcfoSutiKy68LzqwNp_clt2RzmJOaYCxL0,2142
|
|
@@ -84,7 +85,7 @@ pysdmx/io/json/sdmxjson2/messages/schema.py,sha256=JwFYjgvhK_1NN5KQIUYNb0ul4ywQh
|
|
|
84
85
|
pysdmx/io/json/sdmxjson2/messages/structure.py,sha256=c0dyTJK49UpGvL1iLFaFLYFkT89kzvXwk65qd_j-Y1U,12738
|
|
85
86
|
pysdmx/io/json/sdmxjson2/messages/vtl.py,sha256=C-JQY1_W8SrJd2lLdUGCmQO9Br1pdqdT8WmB1K4e_yY,35284
|
|
86
87
|
pysdmx/io/json/sdmxjson2/reader/__init__.py,sha256=RbNnZSrGQa4OE0HBWJau9tPFSQbDklcKZaBWOzxEw4I,1629
|
|
87
|
-
pysdmx/io/json/sdmxjson2/reader/doc_validation.py,sha256=
|
|
88
|
+
pysdmx/io/json/sdmxjson2/reader/doc_validation.py,sha256=PsY_VEJOuEtXj7pRgabiEbWBSWjTlK2oN-ayU0XIXC4,3680
|
|
88
89
|
pysdmx/io/json/sdmxjson2/reader/metadata.py,sha256=FT9CEWjrVfUK4vTEqs-f2ZO6jWeRRkEHCjsHMNgKQp0,1230
|
|
89
90
|
pysdmx/io/json/sdmxjson2/reader/structure.py,sha256=PKHvH_lY2XJtKkg5rGbGSHyYu_raGLrBuaEy8BKr6U0,1209
|
|
90
91
|
pysdmx/io/json/sdmxjson2/writer/__init__.py,sha256=DZGkas1ghei4p6SZsIQI1LPToS-d8F1Nx75MC8reT7g,270
|
|
@@ -103,8 +104,8 @@ pysdmx/io/xml/__structure_aux_reader.py,sha256=50UPOCk2XnCU4J1hQNAXiGL2n8QPXdf4z
|
|
|
103
104
|
pysdmx/io/xml/__structure_aux_writer.py,sha256=0i08hvFw2TfRElaGAeTwfsOaOpw8XWBlZ_zWdxmLPkM,45612
|
|
104
105
|
pysdmx/io/xml/__tokens.py,sha256=M0x-tgoh6_pzL_BP-MArCu3w0NO-AUS6bR-W6BIEJG8,6891
|
|
105
106
|
pysdmx/io/xml/__write_aux.py,sha256=c3HgDMey8nBXyeT_yU8PWdk25bgYyX49R21fLv8CgZc,15534
|
|
106
|
-
pysdmx/io/xml/__write_data_aux.py,sha256=
|
|
107
|
-
pysdmx/io/xml/__write_structure_specific_aux.py,sha256=
|
|
107
|
+
pysdmx/io/xml/__write_data_aux.py,sha256=ebuqtz97wa7scNM7cO0A_Cr40TXmRm3qqYbOjALj6wY,3547
|
|
108
|
+
pysdmx/io/xml/__write_structure_specific_aux.py,sha256=zNep8HYFYnBYjOKZZa7PWRhc60rnRMmnfJh39QMaPtg,9292
|
|
108
109
|
pysdmx/io/xml/config.py,sha256=R24cczVkzkhjVLXpv-qfEm88W3_QTqVt2Qofi8IvJ5Y,93
|
|
109
110
|
pysdmx/io/xml/doc_validation.py,sha256=WXDhte96VEAeZMMHJ0Y68WW8HEoOhEiOYEnbGP5Zwjw,1795
|
|
110
111
|
pysdmx/io/xml/header.py,sha256=My03uhWD3AkfTwfUqiblmLIZuqd7uvIEYsOial6TClg,5971
|
|
@@ -117,7 +118,7 @@ pysdmx/io/xml/sdmx21/reader/structure_specific.py,sha256=S3-gLmaBFjBRIr25qQtlrao
|
|
|
117
118
|
pysdmx/io/xml/sdmx21/reader/submission.py,sha256=8daiBW-sIVGaB6lYwHqJNkLI7IixMSydCK-0ZO8ri4I,1711
|
|
118
119
|
pysdmx/io/xml/sdmx21/writer/__init__.py,sha256=QQGFAss26njCC4eKYxhBcI9LYm5NHuJaAJGKCrIrL80,31
|
|
119
120
|
pysdmx/io/xml/sdmx21/writer/error.py,sha256=0wkX7K_n2oZNkOKg_zpl9Id82qP72Lqof-T-ZLGoZ1M,353
|
|
120
|
-
pysdmx/io/xml/sdmx21/writer/generic.py,sha256=
|
|
121
|
+
pysdmx/io/xml/sdmx21/writer/generic.py,sha256=_ouKoVndG7Jj8_EAvUXPK1RbDKdKemh0kSRaDPFjTHo,16214
|
|
121
122
|
pysdmx/io/xml/sdmx21/writer/structure.py,sha256=S3qoNgXxrakn2V4NLdL5U5mAA16XisI0PpJDuxqalFE,2084
|
|
122
123
|
pysdmx/io/xml/sdmx21/writer/structure_specific.py,sha256=iXc1J-RzoKyRznvgGgdTSeUfyqZLouI8CtSq2YhGBWI,2877
|
|
123
124
|
pysdmx/io/xml/sdmx30/__init__.py,sha256=8BScJFEgLy8DoUreu2RBUtxjGjKyClkKBI_Qtarbk-Y,38
|
|
@@ -139,7 +140,7 @@ pysdmx/model/__base.py,sha256=M1O-uT8RqeKZpGT09HD6ifjPl0F9ORxlRYra3fn8qCM,15182
|
|
|
139
140
|
pysdmx/model/__init__.py,sha256=UPZtum_PF-nPPQa21Bq1doUXLCoU-yRGPh45ZXdUI9k,5493
|
|
140
141
|
pysdmx/model/category.py,sha256=ksYIOGPHgZI619RhmRXZUXHP_juY9im40tWzR2yuMEc,6796
|
|
141
142
|
pysdmx/model/code.py,sha256=Wu6rEXeZf_XA0aBrDXgN-3yvySAHH7SAjrWliFlmC24,12799
|
|
142
|
-
pysdmx/model/concept.py,sha256=
|
|
143
|
+
pysdmx/model/concept.py,sha256=aEVUZVeS2NUzvQ0MZLjFT7iKRFHzhH6oC2vmH7kHLXI,10325
|
|
143
144
|
pysdmx/model/constraint.py,sha256=MwI_GLKzwkuo0BzAsgcnDeB-b7bq8qqwHNte5JjCEFA,1960
|
|
144
145
|
pysdmx/model/dataflow.py,sha256=9EzGn-EDm1OQa52N5ep8VApZoj7lHWfIs-W5tnBP9FY,23954
|
|
145
146
|
pysdmx/model/dataset.py,sha256=Lbr7tYonGHD3NZUD-M9hK2puaEAluOVPG2DbkOohzMM,4861
|
|
@@ -163,7 +164,7 @@ pysdmx/util/__init__.py,sha256=m_XWRAmVJ7F6ai4Ckrj_YuPbhg3cJZAXeZrEThrL88k,3997
|
|
|
163
164
|
pysdmx/util/_date_pattern_map.py,sha256=IS1qONwVHbTBNIFCT0Rqbijj2a9mYvs7onXSK6GeQAQ,1620
|
|
164
165
|
pysdmx/util/_model_utils.py,sha256=nQ1yWBt-tZYDios9xvRvJ7tHq6A8_RoGdY1wi7WGz2w,3793
|
|
165
166
|
pysdmx/util/_net_utils.py,sha256=nOTz_x3FgFrwKh42_J70IqYXz9duQkMFJWtejZXcLHs,1326
|
|
166
|
-
pysdmx-1.10.
|
|
167
|
-
pysdmx-1.10.
|
|
168
|
-
pysdmx-1.10.
|
|
169
|
-
pysdmx-1.10.
|
|
167
|
+
pysdmx-1.10.0rc2.dist-info/METADATA,sha256=ljfxmPVZvRL2pcDovZhz986lhAuMwF6mlwgm1qEoA14,4852
|
|
168
|
+
pysdmx-1.10.0rc2.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
|
|
169
|
+
pysdmx-1.10.0rc2.dist-info/licenses/LICENSE,sha256=3XTNDPtv2RxDUNkQzn9MIWit2u7_Ob5daMLEq-4pBJs,649
|
|
170
|
+
pysdmx-1.10.0rc2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|