pysdmx 1.10.0rc2__py3-none-any.whl → 1.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pysdmx/__init__.py +1 -1
- pysdmx/api/fmr/__init__.py +3 -2
- pysdmx/api/qb/data.py +2 -0
- pysdmx/io/csv/__csv_aux_writer.py +0 -23
- pysdmx/io/csv/sdmx10/reader/__init__.py +1 -1
- pysdmx/io/csv/sdmx10/writer/__init__.py +9 -15
- pysdmx/io/csv/sdmx20/reader/__init__.py +1 -1
- pysdmx/io/csv/sdmx20/writer/__init__.py +1 -1
- pysdmx/io/csv/sdmx21/reader/__init__.py +1 -1
- pysdmx/io/csv/sdmx21/writer/__init__.py +1 -1
- pysdmx/io/json/fusion/messages/dsd.py +8 -2
- pysdmx/io/json/sdmxjson2/messages/dsd.py +35 -14
- pysdmx/io/json/sdmxjson2/reader/doc_validation.py +4 -0
- pysdmx/io/xml/__write_data_aux.py +9 -20
- pysdmx/io/xml/__write_structure_specific_aux.py +54 -71
- pysdmx/io/xml/sdmx21/writer/generic.py +19 -31
- pysdmx/model/concept.py +0 -16
- pysdmx/model/dataflow.py +8 -5
- {pysdmx-1.10.0rc2.dist-info → pysdmx-1.10.1.dist-info}/METADATA +1 -1
- {pysdmx-1.10.0rc2.dist-info → pysdmx-1.10.1.dist-info}/RECORD +22 -23
- pysdmx/io/_pd_utils.py +0 -83
- {pysdmx-1.10.0rc2.dist-info → pysdmx-1.10.1.dist-info}/WHEEL +0 -0
- {pysdmx-1.10.0rc2.dist-info → pysdmx-1.10.1.dist-info}/licenses/LICENSE +0 -0
pysdmx/__init__.py
CHANGED
pysdmx/api/fmr/__init__.py
CHANGED
|
@@ -39,6 +39,7 @@ from pysdmx.model import (
|
|
|
39
39
|
Dataflow,
|
|
40
40
|
DataflowInfo,
|
|
41
41
|
DataProvider,
|
|
42
|
+
DataStructureDefinition,
|
|
42
43
|
Hierarchy,
|
|
43
44
|
HierarchyAssociation,
|
|
44
45
|
Metadataflow,
|
|
@@ -679,7 +680,7 @@ class RegistryClient(__BaseRegistryClient):
|
|
|
679
680
|
agency: str = "*",
|
|
680
681
|
id: str = "*",
|
|
681
682
|
version: str = "+",
|
|
682
|
-
) -> Sequence[
|
|
683
|
+
) -> Sequence[DataStructureDefinition]:
|
|
683
684
|
"""Get the data structures(s) matching the supplied parameters.
|
|
684
685
|
|
|
685
686
|
Args:
|
|
@@ -1253,7 +1254,7 @@ class AsyncRegistryClient(__BaseRegistryClient):
|
|
|
1253
1254
|
agency: str = "*",
|
|
1254
1255
|
id: str = "*",
|
|
1255
1256
|
version: str = "+",
|
|
1256
|
-
) -> Sequence[
|
|
1257
|
+
) -> Sequence[DataStructureDefinition]:
|
|
1257
1258
|
"""Get the data structures(s) matching the supplied parameters.
|
|
1258
1259
|
|
|
1259
1260
|
Args:
|
pysdmx/api/qb/data.py
CHANGED
|
@@ -329,6 +329,8 @@ class DataQuery(_CoreDataQuery, frozen=True, omit_defaults=True):
|
|
|
329
329
|
|
|
330
330
|
def __get_short_v2_qs(self, api_version: ApiVersion) -> str:
|
|
331
331
|
qs = ""
|
|
332
|
+
if self.components:
|
|
333
|
+
qs += self._create_component_filters(self.components)
|
|
332
334
|
if self.updated_after:
|
|
333
335
|
qs = super()._append_qs_param(
|
|
334
336
|
qs,
|
|
@@ -3,8 +3,6 @@ from typing import List, Literal, Optional, Sequence
|
|
|
3
3
|
|
|
4
4
|
import pandas as pd
|
|
5
5
|
|
|
6
|
-
from pysdmx.errors import Invalid
|
|
7
|
-
from pysdmx.io._pd_utils import _fill_na_values
|
|
8
6
|
from pysdmx.io.pd import PandasDataset
|
|
9
7
|
from pysdmx.model import Schema
|
|
10
8
|
from pysdmx.model.dataset import ActionType
|
|
@@ -18,25 +16,6 @@ SDMX_CSV_ACTION_MAPPER = {
|
|
|
18
16
|
}
|
|
19
17
|
|
|
20
18
|
|
|
21
|
-
def _validate_schema_exists(dataset: PandasDataset) -> Schema:
|
|
22
|
-
"""Validates that the dataset has a Schema defined.
|
|
23
|
-
|
|
24
|
-
Args:
|
|
25
|
-
dataset: The dataset to validate.
|
|
26
|
-
|
|
27
|
-
Returns:
|
|
28
|
-
The `Schema` from the dataset.
|
|
29
|
-
|
|
30
|
-
Raises:
|
|
31
|
-
Invalid: If the structure is not a `Schema`.
|
|
32
|
-
"""
|
|
33
|
-
if not isinstance(dataset.structure, Schema):
|
|
34
|
-
raise Invalid(
|
|
35
|
-
"Dataset Structure is not a Schema. Cannot perform operation."
|
|
36
|
-
)
|
|
37
|
-
return dataset.structure
|
|
38
|
-
|
|
39
|
-
|
|
40
19
|
def __write_time_period(df: pd.DataFrame, time_format: str) -> None:
|
|
41
20
|
# TODO: Correct handle of normalized time format
|
|
42
21
|
raise NotImplementedError("Normalized time format is not implemented yet.")
|
|
@@ -91,10 +70,8 @@ def _write_csv_2_aux(
|
|
|
91
70
|
) -> List[pd.DataFrame]:
|
|
92
71
|
dataframes = []
|
|
93
72
|
for dataset in datasets:
|
|
94
|
-
schema = _validate_schema_exists(dataset)
|
|
95
73
|
# Create a copy of the dataset
|
|
96
74
|
df: pd.DataFrame = copy(dataset.data)
|
|
97
|
-
df = _fill_na_values(df, schema)
|
|
98
75
|
structure_ref, unique_id = dataset.short_urn.split("=", maxsplit=1)
|
|
99
76
|
|
|
100
77
|
# Add additional attributes to the dataset
|
|
@@ -24,7 +24,7 @@ def read(input_str: str) -> Sequence[PandasDataset]:
|
|
|
24
24
|
"""
|
|
25
25
|
# Get Dataframe from CSV file
|
|
26
26
|
df_csv = pd.read_csv(
|
|
27
|
-
StringIO(input_str), keep_default_na=False, na_values=[]
|
|
27
|
+
StringIO(input_str), keep_default_na=False, na_values=[""]
|
|
28
28
|
)
|
|
29
29
|
# Drop empty columns
|
|
30
30
|
df_csv = df_csv.dropna(axis=1, how="all")
|
|
@@ -6,12 +6,9 @@ from typing import Literal, Optional, Sequence, Union
|
|
|
6
6
|
|
|
7
7
|
import pandas as pd
|
|
8
8
|
|
|
9
|
-
from pysdmx.io.
|
|
10
|
-
from pysdmx.io.csv.__csv_aux_writer import (
|
|
11
|
-
__write_time_period,
|
|
12
|
-
_validate_schema_exists,
|
|
13
|
-
)
|
|
9
|
+
from pysdmx.io.csv.__csv_aux_writer import __write_time_period
|
|
14
10
|
from pysdmx.io.pd import PandasDataset
|
|
11
|
+
from pysdmx.model import Schema
|
|
15
12
|
from pysdmx.toolkit.pd._data_utils import format_labels
|
|
16
13
|
|
|
17
14
|
|
|
@@ -47,26 +44,22 @@ def write(
|
|
|
47
44
|
# Create a copy of the dataset
|
|
48
45
|
dataframes = []
|
|
49
46
|
for dataset in datasets:
|
|
50
|
-
# Validate that dataset has a proper Schema
|
|
51
|
-
schema = _validate_schema_exists(dataset)
|
|
52
|
-
|
|
53
47
|
df: pd.DataFrame = copy(dataset.data)
|
|
54
48
|
|
|
55
|
-
# Fill missing values
|
|
56
|
-
df = _fill_na_values(df, schema)
|
|
57
|
-
|
|
58
49
|
# Add additional attributes to the dataset
|
|
59
50
|
for k, v in dataset.attributes.items():
|
|
60
51
|
df[k] = v
|
|
61
52
|
structure_id = dataset.short_urn.split("=")[1]
|
|
62
53
|
if time_format is not None and time_format != "original":
|
|
63
54
|
__write_time_period(df, time_format)
|
|
64
|
-
if labels is not None:
|
|
65
|
-
format_labels(df, labels,
|
|
55
|
+
if labels is not None and isinstance(dataset.structure, Schema):
|
|
56
|
+
format_labels(df, labels, dataset.structure.components)
|
|
66
57
|
if labels == "id":
|
|
67
58
|
df.insert(0, "DATAFLOW", structure_id)
|
|
68
59
|
else:
|
|
69
|
-
df.insert(
|
|
60
|
+
df.insert(
|
|
61
|
+
0, "DATAFLOW", f"{structure_id}:{dataset.structure.name}"
|
|
62
|
+
)
|
|
70
63
|
else:
|
|
71
64
|
df.insert(0, "DATAFLOW", structure_id)
|
|
72
65
|
|
|
@@ -75,7 +68,8 @@ def write(
|
|
|
75
68
|
# Concatenate the dataframes
|
|
76
69
|
all_data = pd.concat(dataframes, ignore_index=True, axis=0)
|
|
77
70
|
|
|
78
|
-
|
|
71
|
+
# Ensure null values are represented as empty strings
|
|
72
|
+
all_data = all_data.astype(str).replace({"nan": "", "<NA>": ""})
|
|
79
73
|
# If the output path is an empty string we use None
|
|
80
74
|
output_path = (
|
|
81
75
|
None
|
|
@@ -24,7 +24,7 @@ def read(input_str: str) -> Sequence[PandasDataset]:
|
|
|
24
24
|
"""
|
|
25
25
|
# Get Dataframe from CSV file
|
|
26
26
|
df_csv = pd.read_csv(
|
|
27
|
-
StringIO(input_str), keep_default_na=False, na_values=[]
|
|
27
|
+
StringIO(input_str), keep_default_na=False, na_values=[""]
|
|
28
28
|
)
|
|
29
29
|
# Drop empty columns
|
|
30
30
|
df_csv = df_csv.dropna(axis=1, how="all")
|
|
@@ -24,7 +24,7 @@ def read(input_str: str) -> Sequence[PandasDataset]:
|
|
|
24
24
|
"""
|
|
25
25
|
# Get Dataframe from CSV file
|
|
26
26
|
df_csv = pd.read_csv(
|
|
27
|
-
StringIO(input_str), keep_default_na=False, na_values=[]
|
|
27
|
+
StringIO(input_str), keep_default_na=False, na_values=[""]
|
|
28
28
|
)
|
|
29
29
|
# Drop empty columns
|
|
30
30
|
df_csv = df_csv.dropna(axis=1, how="all")
|
|
@@ -80,8 +80,14 @@ class FusionAttribute(Struct, frozen=True):
|
|
|
80
80
|
measureReferences: Optional[Sequence[str]] = None
|
|
81
81
|
|
|
82
82
|
def __derive_level(self, groups: Sequence[FusionGroup]) -> str:
|
|
83
|
-
if self.
|
|
84
|
-
|
|
83
|
+
if self.measureReferences:
|
|
84
|
+
if (
|
|
85
|
+
len(self.measureReferences) == 1
|
|
86
|
+
and self.measureReferences[0] == "OBS_VALUE"
|
|
87
|
+
):
|
|
88
|
+
return "O"
|
|
89
|
+
else:
|
|
90
|
+
return ",".join(self.measureReferences)
|
|
85
91
|
elif self.attachmentLevel == "DATA_SET":
|
|
86
92
|
return "D"
|
|
87
93
|
elif self.attachmentLevel == "GROUP":
|
|
@@ -126,7 +126,10 @@ class JsonAttributeRelationship(Struct, frozen=True, omit_defaults=True):
|
|
|
126
126
|
) -> str:
|
|
127
127
|
"""Returns the attachment level."""
|
|
128
128
|
if measures:
|
|
129
|
-
|
|
129
|
+
if len(measures) == 1 and measures[0] == "OBS_VALUE":
|
|
130
|
+
return "O"
|
|
131
|
+
else:
|
|
132
|
+
return ",".join(measures)
|
|
130
133
|
elif self.dimensions:
|
|
131
134
|
return ",".join(self.dimensions)
|
|
132
135
|
elif self.group:
|
|
@@ -136,15 +139,17 @@ class JsonAttributeRelationship(Struct, frozen=True, omit_defaults=True):
|
|
|
136
139
|
return "D"
|
|
137
140
|
|
|
138
141
|
@classmethod
|
|
139
|
-
def from_model(
|
|
142
|
+
def from_model(
|
|
143
|
+
self, rel: str, has_measure_rel: bool = False
|
|
144
|
+
) -> "JsonAttributeRelationship":
|
|
140
145
|
"""Converts a pysdmx attribute relationship to an SDMX-JSON one."""
|
|
141
146
|
if rel == "D":
|
|
142
147
|
return JsonAttributeRelationship(dataflow={})
|
|
143
|
-
elif rel == "O":
|
|
148
|
+
elif rel == "O" or has_measure_rel:
|
|
144
149
|
return JsonAttributeRelationship(observation={})
|
|
145
150
|
else:
|
|
146
|
-
|
|
147
|
-
return JsonAttributeRelationship(dimensions=
|
|
151
|
+
comps = rel.split(",")
|
|
152
|
+
return JsonAttributeRelationship(dimensions=comps)
|
|
148
153
|
|
|
149
154
|
|
|
150
155
|
class JsonDimension(Struct, frozen=True, omit_defaults=True):
|
|
@@ -257,17 +262,29 @@ class JsonAttribute(Struct, frozen=True, omit_defaults=True):
|
|
|
257
262
|
)
|
|
258
263
|
|
|
259
264
|
@classmethod
|
|
260
|
-
def from_model(
|
|
265
|
+
def from_model(
|
|
266
|
+
self, attribute: Component, measures: Sequence[Component]
|
|
267
|
+
) -> "JsonAttribute":
|
|
261
268
|
"""Converts a pysdmx attribute to an SDMX-JSON one."""
|
|
262
269
|
concept = _get_concept_reference(attribute)
|
|
263
270
|
usage = "mandatory" if attribute.required else "optional"
|
|
271
|
+
repr = _get_json_representation(attribute)
|
|
272
|
+
|
|
273
|
+
ids = attribute.attachment_level.split(",") # type: ignore[union-attr]
|
|
274
|
+
comps = set(ids)
|
|
275
|
+
mids = {m.id for m in measures}
|
|
276
|
+
has_measure_rel = len(comps.intersection(mids)) > 0
|
|
264
277
|
level = JsonAttributeRelationship.from_model(
|
|
265
|
-
attribute.attachment_level # type: ignore[arg-type]
|
|
278
|
+
attribute.attachment_level, # type: ignore[arg-type]
|
|
279
|
+
has_measure_rel,
|
|
266
280
|
)
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
281
|
+
|
|
282
|
+
if attribute.attachment_level == "O":
|
|
283
|
+
mr = ["OBS_VALUE"]
|
|
284
|
+
elif has_measure_rel:
|
|
285
|
+
mr = ids
|
|
286
|
+
else:
|
|
287
|
+
mr = None
|
|
271
288
|
|
|
272
289
|
return JsonAttribute(
|
|
273
290
|
id=attribute.id,
|
|
@@ -356,12 +373,14 @@ class JsonAttributes(Struct, frozen=True, omit_defaults=True):
|
|
|
356
373
|
|
|
357
374
|
@classmethod
|
|
358
375
|
def from_model(
|
|
359
|
-
self, attributes: Sequence[Component]
|
|
376
|
+
self, attributes: Sequence[Component], measures: Sequence[Component]
|
|
360
377
|
) -> Optional["JsonAttributes"]:
|
|
361
378
|
"""Converts a pysdmx list of attributes to an SDMX-JSON one."""
|
|
362
379
|
if len(attributes) > 0:
|
|
363
380
|
return JsonAttributes(
|
|
364
|
-
attributes=[
|
|
381
|
+
attributes=[
|
|
382
|
+
JsonAttribute.from_model(a, measures) for a in attributes
|
|
383
|
+
]
|
|
365
384
|
)
|
|
366
385
|
else:
|
|
367
386
|
return None
|
|
@@ -502,7 +521,9 @@ class JsonComponents(Struct, frozen=True, omit_defaults=True):
|
|
|
502
521
|
) -> "JsonComponents":
|
|
503
522
|
"""Converts a pysdmx components list to an SDMX-JSON one."""
|
|
504
523
|
dimensions = JsonDimensions.from_model(components.dimensions)
|
|
505
|
-
attributes = JsonAttributes.from_model(
|
|
524
|
+
attributes = JsonAttributes.from_model(
|
|
525
|
+
components.attributes, components.measures
|
|
526
|
+
)
|
|
506
527
|
measures = JsonMeasures.from_model(components.measures)
|
|
507
528
|
if grps is None:
|
|
508
529
|
groups = []
|
|
@@ -26,6 +26,8 @@ def check_dimension_at_observation(
|
|
|
26
26
|
for key, value in dimension_at_observation.items():
|
|
27
27
|
if key not in datasets:
|
|
28
28
|
raise Invalid(f"Dataset {key} not found in Message content.")
|
|
29
|
+
if value == ALL_DIM:
|
|
30
|
+
continue
|
|
29
31
|
writing_validation(datasets[key])
|
|
30
32
|
dataset = datasets[key]
|
|
31
33
|
components = dataset.structure.components # type: ignore[union-attr]
|
|
@@ -42,31 +44,20 @@ def check_dimension_at_observation(
|
|
|
42
44
|
return dimension_at_observation
|
|
43
45
|
|
|
44
46
|
|
|
45
|
-
def writing_validation(dataset: PandasDataset) ->
|
|
46
|
-
"""Structural validation of the dataset.
|
|
47
|
-
|
|
48
|
-
Args:
|
|
49
|
-
dataset: The dataset to validate.
|
|
50
|
-
|
|
51
|
-
Returns:
|
|
52
|
-
The `Schema` from the dataset.
|
|
53
|
-
|
|
54
|
-
Raises:
|
|
55
|
-
Invalid: If the structure is not a `Schema` or validation fails.
|
|
56
|
-
"""
|
|
47
|
+
def writing_validation(dataset: PandasDataset) -> None:
|
|
48
|
+
"""Structural validation of the dataset."""
|
|
57
49
|
if not isinstance(dataset.structure, Schema):
|
|
58
50
|
raise Invalid(
|
|
59
51
|
"Dataset Structure is not a Schema. Cannot perform operation."
|
|
60
52
|
)
|
|
61
|
-
schema = dataset.structure
|
|
62
53
|
required_components = [
|
|
63
54
|
comp.id
|
|
64
|
-
for comp in
|
|
55
|
+
for comp in dataset.structure.components
|
|
65
56
|
if comp.role in (Role.DIMENSION, Role.MEASURE)
|
|
66
57
|
]
|
|
67
58
|
required_components.extend(
|
|
68
59
|
att.id
|
|
69
|
-
for att in
|
|
60
|
+
for att in dataset.structure.components.attributes
|
|
70
61
|
if (
|
|
71
62
|
att.required
|
|
72
63
|
and att.attachment_level is not None
|
|
@@ -75,7 +66,7 @@ def writing_validation(dataset: PandasDataset) -> Schema:
|
|
|
75
66
|
)
|
|
76
67
|
non_required = [
|
|
77
68
|
comp.id
|
|
78
|
-
for comp in
|
|
69
|
+
for comp in dataset.structure.components
|
|
79
70
|
if comp.id not in required_components
|
|
80
71
|
]
|
|
81
72
|
# Columns match components
|
|
@@ -91,11 +82,9 @@ def writing_validation(dataset: PandasDataset) -> Schema:
|
|
|
91
82
|
f"Difference: {', '.join(difference)}"
|
|
92
83
|
)
|
|
93
84
|
# Check if the dataset has at least one dimension and one measure
|
|
94
|
-
if not
|
|
85
|
+
if not dataset.structure.components.dimensions:
|
|
95
86
|
raise Invalid(
|
|
96
87
|
"The dataset structure must have at least one dimension."
|
|
97
88
|
)
|
|
98
|
-
if not
|
|
89
|
+
if not dataset.structure.components.measures:
|
|
99
90
|
raise Invalid("The dataset structure must have at least one measure.")
|
|
100
|
-
|
|
101
|
-
return schema
|
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
# mypy: disable-error-code="union-attr"
|
|
2
2
|
"""Module for writing SDMX-ML 3.0 Structure Specific auxiliary functions."""
|
|
3
3
|
|
|
4
|
-
from typing import Any, Dict,
|
|
4
|
+
from typing import Any, Dict, List
|
|
5
5
|
|
|
6
6
|
import pandas as pd
|
|
7
7
|
|
|
8
|
-
from pysdmx.io._pd_utils import _fill_na_values
|
|
9
8
|
from pysdmx.io.pd import PandasDataset
|
|
10
9
|
from pysdmx.io.xml.__write_aux import (
|
|
11
10
|
ABBR_MSG,
|
|
@@ -70,6 +69,9 @@ def __write_data_structure_specific(
|
|
|
70
69
|
outfile = ""
|
|
71
70
|
|
|
72
71
|
for i, (short_urn, dataset) in enumerate(datasets.items()):
|
|
72
|
+
dataset.data = dataset.data.astype(str).replace(
|
|
73
|
+
{"nan": "", "<NA>": ""}
|
|
74
|
+
)
|
|
73
75
|
outfile += __write_data_single_dataset(
|
|
74
76
|
dataset=dataset,
|
|
75
77
|
prettyprint=prettyprint,
|
|
@@ -113,12 +115,8 @@ def __write_data_single_dataset(
|
|
|
113
115
|
structure_urn = get_structure(dataset)
|
|
114
116
|
id_structure = parse_short_urn(structure_urn).id
|
|
115
117
|
sdmx_type = parse_short_urn(structure_urn).id
|
|
116
|
-
|
|
117
|
-
# Validate structure before writing
|
|
118
|
-
schema = writing_validation(dataset)
|
|
119
|
-
|
|
120
118
|
# Remove nan values from DataFrame
|
|
121
|
-
dataset.data =
|
|
119
|
+
dataset.data = dataset.data.fillna("").astype(str).replace("nan", "")
|
|
122
120
|
|
|
123
121
|
nl = "\n" if prettyprint else ""
|
|
124
122
|
child1 = "\t" if prettyprint else ""
|
|
@@ -141,6 +139,7 @@ def __write_data_single_dataset(
|
|
|
141
139
|
if dim == ALL_DIM:
|
|
142
140
|
data += __memory_optimization_writing(dataset, prettyprint)
|
|
143
141
|
else:
|
|
142
|
+
writing_validation(dataset)
|
|
144
143
|
series_codes, obs_codes, group_codes = get_codes(
|
|
145
144
|
dimension_code=dim,
|
|
146
145
|
structure=dataset.structure, # type: ignore[arg-type]
|
|
@@ -231,85 +230,69 @@ def __obs_processing(data: pd.DataFrame, prettyprint: bool = True) -> str:
|
|
|
231
230
|
return "".join(iterator)
|
|
232
231
|
|
|
233
232
|
|
|
234
|
-
def
|
|
235
|
-
|
|
233
|
+
def __series_processing(
|
|
234
|
+
data: pd.DataFrame,
|
|
235
|
+
series_codes: List[str],
|
|
236
|
+
obs_codes: List[str],
|
|
237
|
+
prettyprint: bool = True,
|
|
236
238
|
) -> str:
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
239
|
+
def __generate_series_str() -> str:
|
|
240
|
+
"""Generates the series item with its observations."""
|
|
241
|
+
out_list: List[str] = []
|
|
242
|
+
data.groupby(by=series_codes)[obs_codes].apply(
|
|
243
|
+
lambda x: __format_dict_ser(out_list, x)
|
|
244
|
+
)
|
|
241
245
|
|
|
242
|
-
|
|
246
|
+
return "".join(out_list)
|
|
243
247
|
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
248
|
+
def __format_dict_ser(
|
|
249
|
+
output_list: List[str],
|
|
250
|
+
obs: Any,
|
|
251
|
+
) -> Any:
|
|
252
|
+
"""Formats the series as key=value pairs."""
|
|
253
|
+
# Creating the observation dict,
|
|
254
|
+
# we always get the first element on Series
|
|
255
|
+
# as we are grouping by it
|
|
256
|
+
data_dict["Series"][0]["Obs"] = obs.to_dict(orient="records")
|
|
257
|
+
output_list.append(__format_ser_str(data_dict["Series"][0]))
|
|
258
|
+
# We remove the data for series as it is no longer necessary
|
|
259
|
+
del data_dict["Series"][0]
|
|
247
260
|
|
|
248
|
-
|
|
261
|
+
def __format_ser_str(data_info: Dict[Any, Any]) -> str:
|
|
262
|
+
"""Formats the series as key=value pairs."""
|
|
263
|
+
child2 = "\t\t" if prettyprint else ""
|
|
264
|
+
child3 = "\t\t\t" if prettyprint else ""
|
|
265
|
+
nl = "\n" if prettyprint else ""
|
|
249
266
|
|
|
250
|
-
|
|
251
|
-
out_element += f"{child3}<Obs "
|
|
267
|
+
out_element = f"{child2}<Series "
|
|
252
268
|
|
|
253
|
-
for k, v in
|
|
254
|
-
|
|
269
|
+
for k, v in data_info.items():
|
|
270
|
+
if k != "Obs":
|
|
271
|
+
out_element += f"{k}={__escape_xml(str(v))!r} "
|
|
255
272
|
|
|
256
|
-
out_element += f"
|
|
273
|
+
out_element += f">{nl}"
|
|
274
|
+
|
|
275
|
+
for obs in data_info["Obs"]:
|
|
276
|
+
out_element += f"{child3}<Obs "
|
|
257
277
|
|
|
258
|
-
|
|
278
|
+
for k, v in obs.items():
|
|
279
|
+
out_element += f"{k}={__escape_xml(str(v))!r} "
|
|
259
280
|
|
|
260
|
-
|
|
281
|
+
out_element += f"/>{nl}"
|
|
261
282
|
|
|
283
|
+
out_element += f"{child2}</Series>{nl}"
|
|
262
284
|
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
return {"Series": [{}] if not data.empty else []}
|
|
269
|
-
return {
|
|
285
|
+
return out_element
|
|
286
|
+
|
|
287
|
+
# Getting each datapoint from data and creating dict
|
|
288
|
+
data = data.sort_values(series_codes, axis=0)
|
|
289
|
+
data_dict = {
|
|
270
290
|
"Series": data[series_codes]
|
|
271
291
|
.drop_duplicates()
|
|
272
292
|
.reset_index(drop=True)
|
|
273
293
|
.to_dict(orient="records")
|
|
274
294
|
}
|
|
275
295
|
|
|
296
|
+
out = __generate_series_str()
|
|
276
297
|
|
|
277
|
-
|
|
278
|
-
data: pd.DataFrame,
|
|
279
|
-
series_codes: List[str],
|
|
280
|
-
obs_codes: List[str],
|
|
281
|
-
data_dict: Dict[str, List[Dict[Hashable, Any]]],
|
|
282
|
-
prettyprint: bool = True,
|
|
283
|
-
) -> str:
|
|
284
|
-
"""Process series and their observations into XML string."""
|
|
285
|
-
out_list: List[str] = []
|
|
286
|
-
|
|
287
|
-
def append_series_with_obs(obs: Any) -> str:
|
|
288
|
-
"""Append series with observations to output list."""
|
|
289
|
-
data_dict["Series"][0]["Obs"] = obs.to_dict(orient="records")
|
|
290
|
-
result = __format_ser_str(data_dict["Series"][0], prettyprint)
|
|
291
|
-
out_list.append(result)
|
|
292
|
-
del data_dict["Series"][0]
|
|
293
|
-
return result
|
|
294
|
-
|
|
295
|
-
if not series_codes:
|
|
296
|
-
if not data.empty:
|
|
297
|
-
append_series_with_obs(data[obs_codes])
|
|
298
|
-
else:
|
|
299
|
-
data.groupby(by=series_codes)[obs_codes].apply(append_series_with_obs)
|
|
300
|
-
|
|
301
|
-
return "".join(out_list)
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
def __series_processing(
|
|
305
|
-
data: pd.DataFrame,
|
|
306
|
-
series_codes: List[str],
|
|
307
|
-
obs_codes: List[str],
|
|
308
|
-
prettyprint: bool = True,
|
|
309
|
-
) -> str:
|
|
310
|
-
"""Write series to SDMX-ML Structure-Specific format."""
|
|
311
|
-
data = data.sort_values(series_codes, axis=0)
|
|
312
|
-
data_dict = __build_series_dict(data, series_codes)
|
|
313
|
-
return __process_series_observations(
|
|
314
|
-
data, series_codes, obs_codes, data_dict, prettyprint
|
|
315
|
-
)
|
|
298
|
+
return out
|
|
@@ -2,11 +2,10 @@
|
|
|
2
2
|
"""Module for writing SDMX-ML 2.1 Generic data messages."""
|
|
3
3
|
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import Any, Dict,
|
|
5
|
+
from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
|
|
6
6
|
|
|
7
7
|
import pandas as pd
|
|
8
8
|
|
|
9
|
-
from pysdmx.io._pd_utils import _fill_na_values
|
|
10
9
|
from pysdmx.io.format import Format
|
|
11
10
|
from pysdmx.io.pd import PandasDataset
|
|
12
11
|
from pysdmx.io.xml.__write_aux import (
|
|
@@ -123,6 +122,7 @@ def __write_data_generic(
|
|
|
123
122
|
|
|
124
123
|
for short_urn, dataset in datasets.items():
|
|
125
124
|
writing_validation(dataset)
|
|
125
|
+
dataset.data = dataset.data.fillna("").astype(str)
|
|
126
126
|
outfile += __write_data_single_dataset(
|
|
127
127
|
dataset=dataset,
|
|
128
128
|
prettyprint=prettyprint,
|
|
@@ -160,8 +160,7 @@ def __write_data_single_dataset(
|
|
|
160
160
|
outfile = ""
|
|
161
161
|
structure_urn = get_structure(dataset)
|
|
162
162
|
id_structure = parse_short_urn(structure_urn).id
|
|
163
|
-
|
|
164
|
-
dataset.data = _fill_na_values(dataset.data, schema)
|
|
163
|
+
dataset.data = dataset.data.fillna("").astype(str).replace("nan", "")
|
|
165
164
|
|
|
166
165
|
nl = "\n" if prettyprint else ""
|
|
167
166
|
child1 = "\t" if prettyprint else ""
|
|
@@ -348,14 +347,9 @@ def __series_processing(
|
|
|
348
347
|
) -> str:
|
|
349
348
|
def __generate_series_str() -> str:
|
|
350
349
|
out_list: List[str] = []
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
__format_dict_ser(out_list, data)
|
|
355
|
-
else:
|
|
356
|
-
data.groupby(by=group_cols)[data.columns].apply(
|
|
357
|
-
lambda x: __format_dict_ser(out_list, x)
|
|
358
|
-
)
|
|
350
|
+
data.groupby(by=series_codes + series_att_codes)[data.columns].apply(
|
|
351
|
+
lambda x: __format_dict_ser(out_list, x)
|
|
352
|
+
)
|
|
359
353
|
|
|
360
354
|
return "".join(out_list)
|
|
361
355
|
|
|
@@ -365,14 +359,13 @@ def __series_processing(
|
|
|
365
359
|
) -> Any:
|
|
366
360
|
obs_data = group_data[obs_codes + obs_att_codes].copy()
|
|
367
361
|
data_dict["Series"][0]["Obs"] = obs_data.to_dict(orient="records")
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
)
|
|
362
|
+
data_dict["Series"][0].update(
|
|
363
|
+
{
|
|
364
|
+
k: v
|
|
365
|
+
for k, v in group_data[series_att_codes].iloc[0].items()
|
|
366
|
+
if k in series_att_codes
|
|
367
|
+
}
|
|
368
|
+
)
|
|
376
369
|
output_list.append(
|
|
377
370
|
__format_ser_str(
|
|
378
371
|
data_info=data_dict["Series"][0],
|
|
@@ -387,17 +380,12 @@ def __series_processing(
|
|
|
387
380
|
|
|
388
381
|
# Getting each datapoint from data and creating dict
|
|
389
382
|
data = data.sort_values(series_codes, axis=0)
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
"Series": data[series_codes]
|
|
397
|
-
.drop_duplicates()
|
|
398
|
-
.reset_index(drop=True)
|
|
399
|
-
.to_dict(orient="records")
|
|
400
|
-
}
|
|
383
|
+
data_dict = {
|
|
384
|
+
"Series": data[series_codes]
|
|
385
|
+
.drop_duplicates()
|
|
386
|
+
.reset_index(drop=True)
|
|
387
|
+
.to_dict(orient="records")
|
|
388
|
+
}
|
|
401
389
|
|
|
402
390
|
out = __generate_series_str()
|
|
403
391
|
|
pysdmx/model/concept.py
CHANGED
|
@@ -35,8 +35,6 @@ class DataType(str, Enum):
|
|
|
35
35
|
"""True or False."""
|
|
36
36
|
COUNT = "Count"
|
|
37
37
|
"""A simple incrementing integer type."""
|
|
38
|
-
DATA_SET_REFERENCE = "DataSetReference"
|
|
39
|
-
"""Reference to a data set."""
|
|
40
38
|
DATE = "GregorianDay"
|
|
41
39
|
"""A ISO 8601 date (e.g. ``2011-06-17``)."""
|
|
42
40
|
DATE_TIME = "DateTime"
|
|
@@ -49,24 +47,12 @@ class DataType(str, Enum):
|
|
|
49
47
|
"""A decimal number (8 bytes)."""
|
|
50
48
|
DURATION = "Duration"
|
|
51
49
|
"""An ISO 8601 duration."""
|
|
52
|
-
EXCLUSIVE_VALUE_RANGE = "ExclusiveValueRange"
|
|
53
|
-
"""A range of values excluding boundaries."""
|
|
54
50
|
FLOAT = "Float"
|
|
55
51
|
"""A decimal number (4 bytes)."""
|
|
56
|
-
GEOSPATIAL_INFORMATION = "GeospatialInformation"
|
|
57
|
-
"""Geospatial data format."""
|
|
58
52
|
GREGORIAN_TIME_PERIOD = "GregorianTimePeriod"
|
|
59
53
|
"""This is the union of YEAR, YEAR_MONTH, and DATE."""
|
|
60
|
-
IDENTIFIABLE_REFERENCE = "IdentifiableReference"
|
|
61
|
-
"""Reference to an identifiable object."""
|
|
62
|
-
INCLUSIVE_VALUE_RANGE = "InclusiveValueRange"
|
|
63
|
-
"""A range of values including boundaries."""
|
|
64
|
-
INCREMENTAL = "Incremental"
|
|
65
|
-
"""An integer type that increases sequentially."""
|
|
66
54
|
INTEGER = "Integer"
|
|
67
55
|
"""A whole number (4 bytes)."""
|
|
68
|
-
KEY_VALUES = "KeyValues"
|
|
69
|
-
"""Key values reference."""
|
|
70
56
|
LONG = "Long"
|
|
71
57
|
"""A whole number (8 bytes)."""
|
|
72
58
|
MONTH = "Month"
|
|
@@ -99,8 +85,6 @@ class DataType(str, Enum):
|
|
|
99
85
|
"""A string (as immutable sequence of Unicode code points)."""
|
|
100
86
|
TIME = "Time"
|
|
101
87
|
"""An ISO 8601 time (e.g. ``12:50:42``)."""
|
|
102
|
-
TIMES_RANGE = "TimesRange"
|
|
103
|
-
"""A range of time periods."""
|
|
104
88
|
URI = "URI"
|
|
105
89
|
"""A uniform resource identifier, such as a URL."""
|
|
106
90
|
XHTML = "XHTML"
|
pysdmx/model/dataflow.py
CHANGED
|
@@ -101,10 +101,11 @@ class Component(
|
|
|
101
101
|
one of: *D* (for Dataset), *O* (for Observation), any string identifying a
|
|
102
102
|
component ID (FREQ) or comma-separated list of component IDs
|
|
103
103
|
(FREQ,REF_AREA). The latter can be used to identify the dimension, group
|
|
104
|
-
or series to which the attribute is attached.
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
104
|
+
or series to which the attribute is attached. It can also be used to
|
|
105
|
+
identify the measure(s) to which the attribute relates, in case multiple
|
|
106
|
+
measures are defined. The attachment level of a component may vary with the
|
|
107
|
+
statistical domain, i.e. a component attached to a series in a particular
|
|
108
|
+
domain may be attached to, say, the dataset in another domain.
|
|
108
109
|
|
|
109
110
|
The *codes* field indicates the expected (i.e. allowed) set of values a
|
|
110
111
|
component can take within a particular domain. In addition to
|
|
@@ -128,7 +129,9 @@ class Component(
|
|
|
128
129
|
Attributes can be attached at different levels such as
|
|
129
130
|
D (for dataset-level attributes), O (for observation-level
|
|
130
131
|
attributes) or a combination of dimension IDs, separated by
|
|
131
|
-
commas, for series- and group-level attributes
|
|
132
|
+
commas, for series- and group-level attributes, as well as for
|
|
133
|
+
attributes attached to one or more measures, when multiple
|
|
134
|
+
measures are defined).
|
|
132
135
|
A post_init check makes this attribute mandatory for attributes.
|
|
133
136
|
array_def: Any additional constraints for array types.
|
|
134
137
|
urn: The URN of the component.
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
pysdmx/__extras_check.py,sha256=Tmluui2OuJVyJB6a1Jl0PlrRjpsswhtCjAqtRLOSero,2059
|
|
2
|
-
pysdmx/__init__.py,sha256=
|
|
2
|
+
pysdmx/__init__.py,sha256=76qTEXwql_RGDSAgomUh23-P8FW79kx6Pcvc8iKZR8s,68
|
|
3
3
|
pysdmx/api/__init__.py,sha256=8lRaF6kEO51ehl0fmW_pHLvkN_34TtEhqhr3oKo6E6g,26
|
|
4
4
|
pysdmx/api/dc/__init__.py,sha256=oPU32X8CRZy4T1to9mO5KMqMwxQsVI424dPqai-I8zI,121
|
|
5
5
|
pysdmx/api/dc/_api.py,sha256=poy1FYFXnF6maBGy5lpOodf32-7QQjH8PCBNDkuOXxQ,7747
|
|
@@ -10,12 +10,12 @@ pysdmx/api/dc/query/_parsing_util.py,sha256=pUc5z6sijGmJZsLilAxiPsCSRIO7l2iznzL3
|
|
|
10
10
|
pysdmx/api/dc/query/_py_parser.py,sha256=_kVUk6Xu5jZdclng1F6eDSZS2-ok_yncI1y1q5lYpBU,1502
|
|
11
11
|
pysdmx/api/dc/query/_sql_parser.py,sha256=vQjhSyt6qA4jAHchkq4XXVhNPtKjKSVLzhoPkUyhJKk,1561
|
|
12
12
|
pysdmx/api/dc/query/util.py,sha256=9HALmvlgVCckaMTVG7sEFbAw_mBwfbL4K-Pac2KPSYw,915
|
|
13
|
-
pysdmx/api/fmr/__init__.py,sha256
|
|
13
|
+
pysdmx/api/fmr/__init__.py,sha256=uXD2dOY2lJSY6CoracIh8c1_7wWiJcsQ5ebIEIbHumk,50532
|
|
14
14
|
pysdmx/api/fmr/maintenance.py,sha256=AnR1beyL6nsoDM5LmDLXnRMW5JvhGHXTADf_INSNgUg,5920
|
|
15
15
|
pysdmx/api/gds/__init__.py,sha256=BBl75wEdcz9zPMfk6kAGHitRW39S774VL9xifMZ_uHs,11560
|
|
16
16
|
pysdmx/api/qb/__init__.py,sha256=vxdMJFFg4J_KWADrnzj_8KcU0UlwpJPdx0yiW3QJo9Y,1498
|
|
17
17
|
pysdmx/api/qb/availability.py,sha256=2yPHnTXu_jACNKNhhtXMkxVkfLK1Ewa5ucGAbRxvC5o,10181
|
|
18
|
-
pysdmx/api/qb/data.py,sha256=
|
|
18
|
+
pysdmx/api/qb/data.py,sha256=hQayPQnN_ebBgIoC1xDei2uTbyv0UsoM4qyZyGaNz78,22143
|
|
19
19
|
pysdmx/api/qb/gds.py,sha256=Z2KhP6m09_oWI2CbYRhlTsx8VLC-_UZaQEOEqX94SOw,4975
|
|
20
20
|
pysdmx/api/qb/refmeta.py,sha256=--h0QOvaLGILT_6GBAZ2Ld5aqAELEW-PTsHNcj7YoG8,10677
|
|
21
21
|
pysdmx/api/qb/registration.py,sha256=IURlmXcXQi8e-w5YXCgRNs07EQJZJ2bOdZb7M_k5iZ8,7132
|
|
@@ -25,19 +25,18 @@ pysdmx/api/qb/structure.py,sha256=0m_Fmp410Rfjdv0ehLS8ivwccwn-xfBkcIdYayu8pgg,17
|
|
|
25
25
|
pysdmx/api/qb/util.py,sha256=at2Sb2kVltSTDc1gKiqG6HtIFhjSx-Msbe--wCvRbQI,3667
|
|
26
26
|
pysdmx/errors.py,sha256=9bfujYykzfGMa1TuUOmH9QqghnZGOo556fvbKH2jFa8,3295
|
|
27
27
|
pysdmx/io/__init__.py,sha256=96ZCY1PfcWp_q2Nlo2tHJAK31sH_b05v9UkbR0vGdg0,180
|
|
28
|
-
pysdmx/io/_pd_utils.py,sha256=NgGhjn9clB0xD_3hsOzwXci8ix9dLe2Lt2DAZ9Tkyzw,2555
|
|
29
28
|
pysdmx/io/csv/__csv_aux_reader.py,sha256=2RGzhga_VDnh2OVX_Bo-rR2hgAQxHXa7zt7-D5MVBu0,3994
|
|
30
|
-
pysdmx/io/csv/__csv_aux_writer.py,sha256=
|
|
29
|
+
pysdmx/io/csv/__csv_aux_writer.py,sha256=lCPl6hpyp12xhXlyimRlxxOT33RcgXDGTQZb8JguksI,4118
|
|
31
30
|
pysdmx/io/csv/__init__.py,sha256=53f2rPkwILigOqArgRQOOwLk-ha6zVTe4EypIsR7K6I,107
|
|
32
31
|
pysdmx/io/csv/sdmx10/__init__.py,sha256=NAAm_yodK-gzkuzewGQeYpF3f5nZmDA4vWGfT2KGTWc,38
|
|
33
|
-
pysdmx/io/csv/sdmx10/reader/__init__.py,sha256=
|
|
34
|
-
pysdmx/io/csv/sdmx10/writer/__init__.py,sha256=
|
|
32
|
+
pysdmx/io/csv/sdmx10/reader/__init__.py,sha256=uGc-sv4YXHteVQZPTdkVUkVZ6iKY7h7Fg56dw7VZ2UU,2735
|
|
33
|
+
pysdmx/io/csv/sdmx10/writer/__init__.py,sha256=d-kLcP711k1nmG_D4whDxqWCzODRT7HTqk95N-jXBK8,2923
|
|
35
34
|
pysdmx/io/csv/sdmx20/__init__.py,sha256=6_YCb4iuUWJRS9y0KSdf4ebNKblSlnTTzNC5c19kNk8,38
|
|
36
|
-
pysdmx/io/csv/sdmx20/reader/__init__.py,sha256=
|
|
37
|
-
pysdmx/io/csv/sdmx20/writer/__init__.py,sha256=
|
|
35
|
+
pysdmx/io/csv/sdmx20/reader/__init__.py,sha256=PmVXd8QXvXej6XSDAPsIc8VptLk69NK37-wunHH7Pvc,2846
|
|
36
|
+
pysdmx/io/csv/sdmx20/writer/__init__.py,sha256=puksYRzcog3wv9JGWA--6rvv9aRAn86Vsv1CyA7Em-c,2489
|
|
38
37
|
pysdmx/io/csv/sdmx21/__init__.py,sha256=I3_dwi4A4if62_mwEjqbOa-F7mhoIMf0D6szpDf3W7c,38
|
|
39
|
-
pysdmx/io/csv/sdmx21/reader/__init__.py,sha256=
|
|
40
|
-
pysdmx/io/csv/sdmx21/writer/__init__.py,sha256=
|
|
38
|
+
pysdmx/io/csv/sdmx21/reader/__init__.py,sha256=J1cCkZh3klgZZWjdQ_U1zkfzT_DVzQmdreGZhN33SLs,2866
|
|
39
|
+
pysdmx/io/csv/sdmx21/writer/__init__.py,sha256=CH8Nm7hqvXyN6XM_D2nJRmbKj6CJV-X1QzSF0WJrs0E,2484
|
|
41
40
|
pysdmx/io/format.py,sha256=EO-PyYpiU0WswvEGA5UHokladxPezcwBUo1AJTqxp1Q,5250
|
|
42
41
|
pysdmx/io/input_processor.py,sha256=P1_jKegrOyV7EaZLjLrq8fX2u1EI7gPBJoKvlBCNkP0,6967
|
|
43
42
|
pysdmx/io/json/fusion/messages/__init__.py,sha256=ac2jWfjGGBcfoSutiKy68LzqwNp_clt2RzmJOaYCxL0,2142
|
|
@@ -47,7 +46,7 @@ pysdmx/io/json/fusion/messages/concept.py,sha256=m4lTyncSIriFXWWupE-zLxUVPx3Xrg5
|
|
|
47
46
|
pysdmx/io/json/fusion/messages/constraint.py,sha256=dPkzhCWN49Y9ReSZPRFTdM6GWc0rU2BZTyFfWsqlX34,615
|
|
48
47
|
pysdmx/io/json/fusion/messages/core.py,sha256=GdzF3TNUGrB0gxuaaSpk9LaYqcdy_M6L2azExZQfM0Q,4843
|
|
49
48
|
pysdmx/io/json/fusion/messages/dataflow.py,sha256=lsaMPjmA-KiM51I78wrONfNHyvfBSeAll5Sp0jmTezc,2972
|
|
50
|
-
pysdmx/io/json/fusion/messages/dsd.py,sha256=
|
|
49
|
+
pysdmx/io/json/fusion/messages/dsd.py,sha256=hnVOY0GZSuYQFCZd-hcjGd_YEacUue_9S4YjpSJJigM,10341
|
|
51
50
|
pysdmx/io/json/fusion/messages/map.py,sha256=TPsCFuUfk5Jhhe7CNvEoHuFNZFpHhvNiYFWeIEUx-sc,7695
|
|
52
51
|
pysdmx/io/json/fusion/messages/metadataflow.py,sha256=Js4j8lUF9ZwqL7lJUrfrjk9tmBmRQPt8qxdrfnZ6R5E,1374
|
|
53
52
|
pysdmx/io/json/fusion/messages/mpa.py,sha256=WvcHn7Pa_UBHxkZbfSzIxc8qeeMfTWThxgCRHuioXFY,1494
|
|
@@ -73,7 +72,7 @@ pysdmx/io/json/sdmxjson2/messages/concept.py,sha256=x7BoG6AaziZGNjxeypwy_lsFTmdH
|
|
|
73
72
|
pysdmx/io/json/sdmxjson2/messages/constraint.py,sha256=TCg-Z6ZkZSzlhjvaZebKk1wL_CPhmJzyKjEkE4FPkAc,9055
|
|
74
73
|
pysdmx/io/json/sdmxjson2/messages/core.py,sha256=qF0fscWY1yRxmN-4s2UweEWqbDMSioaUPaxlYEo8ouY,10137
|
|
75
74
|
pysdmx/io/json/sdmxjson2/messages/dataflow.py,sha256=wjeq9yexTa012AtGdZsZflp3WQ6fP-3kas-gxADTFeQ,6256
|
|
76
|
-
pysdmx/io/json/sdmxjson2/messages/dsd.py,sha256=
|
|
75
|
+
pysdmx/io/json/sdmxjson2/messages/dsd.py,sha256=ZLFISJYtpP4cPQwcIjMkEGVPBE4Yy6cCKvwGw0xjFh4,20431
|
|
77
76
|
pysdmx/io/json/sdmxjson2/messages/map.py,sha256=ZB7XPX6nUcu2MMHAsgwAR0nxlbEQF7YasplhlS5K9EI,16774
|
|
78
77
|
pysdmx/io/json/sdmxjson2/messages/metadataflow.py,sha256=1hKCyzTEAvB_MOgmtjBObC9RVNSge7Sick6nQMwl17o,2994
|
|
79
78
|
pysdmx/io/json/sdmxjson2/messages/mpa.py,sha256=ryoQCvOvd2j6gPdGOEML4bc-NXUSetuKNOfmd9Ogn2s,3050
|
|
@@ -85,7 +84,7 @@ pysdmx/io/json/sdmxjson2/messages/schema.py,sha256=JwFYjgvhK_1NN5KQIUYNb0ul4ywQh
|
|
|
85
84
|
pysdmx/io/json/sdmxjson2/messages/structure.py,sha256=c0dyTJK49UpGvL1iLFaFLYFkT89kzvXwk65qd_j-Y1U,12738
|
|
86
85
|
pysdmx/io/json/sdmxjson2/messages/vtl.py,sha256=C-JQY1_W8SrJd2lLdUGCmQO9Br1pdqdT8WmB1K4e_yY,35284
|
|
87
86
|
pysdmx/io/json/sdmxjson2/reader/__init__.py,sha256=RbNnZSrGQa4OE0HBWJau9tPFSQbDklcKZaBWOzxEw4I,1629
|
|
88
|
-
pysdmx/io/json/sdmxjson2/reader/doc_validation.py,sha256=
|
|
87
|
+
pysdmx/io/json/sdmxjson2/reader/doc_validation.py,sha256=Li0N5UmTKJy_FWT5vfwt7DEtp8xiYaXccDRBna_Q0Jw,3822
|
|
89
88
|
pysdmx/io/json/sdmxjson2/reader/metadata.py,sha256=FT9CEWjrVfUK4vTEqs-f2ZO6jWeRRkEHCjsHMNgKQp0,1230
|
|
90
89
|
pysdmx/io/json/sdmxjson2/reader/structure.py,sha256=PKHvH_lY2XJtKkg5rGbGSHyYu_raGLrBuaEy8BKr6U0,1209
|
|
91
90
|
pysdmx/io/json/sdmxjson2/writer/__init__.py,sha256=DZGkas1ghei4p6SZsIQI1LPToS-d8F1Nx75MC8reT7g,270
|
|
@@ -104,8 +103,8 @@ pysdmx/io/xml/__structure_aux_reader.py,sha256=50UPOCk2XnCU4J1hQNAXiGL2n8QPXdf4z
|
|
|
104
103
|
pysdmx/io/xml/__structure_aux_writer.py,sha256=0i08hvFw2TfRElaGAeTwfsOaOpw8XWBlZ_zWdxmLPkM,45612
|
|
105
104
|
pysdmx/io/xml/__tokens.py,sha256=M0x-tgoh6_pzL_BP-MArCu3w0NO-AUS6bR-W6BIEJG8,6891
|
|
106
105
|
pysdmx/io/xml/__write_aux.py,sha256=c3HgDMey8nBXyeT_yU8PWdk25bgYyX49R21fLv8CgZc,15534
|
|
107
|
-
pysdmx/io/xml/__write_data_aux.py,sha256=
|
|
108
|
-
pysdmx/io/xml/__write_structure_specific_aux.py,sha256=
|
|
106
|
+
pysdmx/io/xml/__write_data_aux.py,sha256=mnJpooA4koqGbKhF-6eonT3drQ-qCuwwP-lfZLHKqVQ,3403
|
|
107
|
+
pysdmx/io/xml/__write_structure_specific_aux.py,sha256=reRDVw4Xwag0ODyMzm9uOk9WJ_e1ELxAPYHSMUUDJBQ,8919
|
|
109
108
|
pysdmx/io/xml/config.py,sha256=R24cczVkzkhjVLXpv-qfEm88W3_QTqVt2Qofi8IvJ5Y,93
|
|
110
109
|
pysdmx/io/xml/doc_validation.py,sha256=WXDhte96VEAeZMMHJ0Y68WW8HEoOhEiOYEnbGP5Zwjw,1795
|
|
111
110
|
pysdmx/io/xml/header.py,sha256=My03uhWD3AkfTwfUqiblmLIZuqd7uvIEYsOial6TClg,5971
|
|
@@ -118,7 +117,7 @@ pysdmx/io/xml/sdmx21/reader/structure_specific.py,sha256=S3-gLmaBFjBRIr25qQtlrao
|
|
|
118
117
|
pysdmx/io/xml/sdmx21/reader/submission.py,sha256=8daiBW-sIVGaB6lYwHqJNkLI7IixMSydCK-0ZO8ri4I,1711
|
|
119
118
|
pysdmx/io/xml/sdmx21/writer/__init__.py,sha256=QQGFAss26njCC4eKYxhBcI9LYm5NHuJaAJGKCrIrL80,31
|
|
120
119
|
pysdmx/io/xml/sdmx21/writer/error.py,sha256=0wkX7K_n2oZNkOKg_zpl9Id82qP72Lqof-T-ZLGoZ1M,353
|
|
121
|
-
pysdmx/io/xml/sdmx21/writer/generic.py,sha256=
|
|
120
|
+
pysdmx/io/xml/sdmx21/writer/generic.py,sha256=8_kUMMUiIFVdKMyhpR2LKDyfiinBohO_aL_6GZpOInY,15786
|
|
122
121
|
pysdmx/io/xml/sdmx21/writer/structure.py,sha256=S3qoNgXxrakn2V4NLdL5U5mAA16XisI0PpJDuxqalFE,2084
|
|
123
122
|
pysdmx/io/xml/sdmx21/writer/structure_specific.py,sha256=iXc1J-RzoKyRznvgGgdTSeUfyqZLouI8CtSq2YhGBWI,2877
|
|
124
123
|
pysdmx/io/xml/sdmx30/__init__.py,sha256=8BScJFEgLy8DoUreu2RBUtxjGjKyClkKBI_Qtarbk-Y,38
|
|
@@ -140,9 +139,9 @@ pysdmx/model/__base.py,sha256=M1O-uT8RqeKZpGT09HD6ifjPl0F9ORxlRYra3fn8qCM,15182
|
|
|
140
139
|
pysdmx/model/__init__.py,sha256=UPZtum_PF-nPPQa21Bq1doUXLCoU-yRGPh45ZXdUI9k,5493
|
|
141
140
|
pysdmx/model/category.py,sha256=ksYIOGPHgZI619RhmRXZUXHP_juY9im40tWzR2yuMEc,6796
|
|
142
141
|
pysdmx/model/code.py,sha256=Wu6rEXeZf_XA0aBrDXgN-3yvySAHH7SAjrWliFlmC24,12799
|
|
143
|
-
pysdmx/model/concept.py,sha256=
|
|
142
|
+
pysdmx/model/concept.py,sha256=mQfqJdtWc10WdTKX_Mw7Znw65cN3QO-kCar9MWYeWO4,9645
|
|
144
143
|
pysdmx/model/constraint.py,sha256=MwI_GLKzwkuo0BzAsgcnDeB-b7bq8qqwHNte5JjCEFA,1960
|
|
145
|
-
pysdmx/model/dataflow.py,sha256=
|
|
144
|
+
pysdmx/model/dataflow.py,sha256=IyVBWIKSkl6Qm5R2a6n_uPveUPsIpaYUMc_4hcwJ4Uw,24196
|
|
146
145
|
pysdmx/model/dataset.py,sha256=Lbr7tYonGHD3NZUD-M9hK2puaEAluOVPG2DbkOohzMM,4861
|
|
147
146
|
pysdmx/model/gds.py,sha256=QrnmI8Hn--C95gGXCeUeWwhn-Ur7DuT08Cg7oPJIEVI,4976
|
|
148
147
|
pysdmx/model/map.py,sha256=9a3hl6efq_5kAYuJWkepoQOkao8Eqk17N69JGyRfxsk,17506
|
|
@@ -164,7 +163,7 @@ pysdmx/util/__init__.py,sha256=m_XWRAmVJ7F6ai4Ckrj_YuPbhg3cJZAXeZrEThrL88k,3997
|
|
|
164
163
|
pysdmx/util/_date_pattern_map.py,sha256=IS1qONwVHbTBNIFCT0Rqbijj2a9mYvs7onXSK6GeQAQ,1620
|
|
165
164
|
pysdmx/util/_model_utils.py,sha256=nQ1yWBt-tZYDios9xvRvJ7tHq6A8_RoGdY1wi7WGz2w,3793
|
|
166
165
|
pysdmx/util/_net_utils.py,sha256=nOTz_x3FgFrwKh42_J70IqYXz9duQkMFJWtejZXcLHs,1326
|
|
167
|
-
pysdmx-1.10.
|
|
168
|
-
pysdmx-1.10.
|
|
169
|
-
pysdmx-1.10.
|
|
170
|
-
pysdmx-1.10.
|
|
166
|
+
pysdmx-1.10.1.dist-info/METADATA,sha256=tXCxkMOnWoEJWTBRH4ZvzO1k20lm9cxDIneSSfQ8pzE,4849
|
|
167
|
+
pysdmx-1.10.1.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
|
|
168
|
+
pysdmx-1.10.1.dist-info/licenses/LICENSE,sha256=3XTNDPtv2RxDUNkQzn9MIWit2u7_Ob5daMLEq-4pBJs,649
|
|
169
|
+
pysdmx-1.10.1.dist-info/RECORD,,
|
pysdmx/io/_pd_utils.py
DELETED
|
@@ -1,83 +0,0 @@
|
|
|
1
|
-
import pandas as pd
|
|
2
|
-
|
|
3
|
-
from pysdmx.errors import Invalid
|
|
4
|
-
from pysdmx.model.concept import DataType
|
|
5
|
-
from pysdmx.model.dataflow import Schema
|
|
6
|
-
|
|
7
|
-
NUMERIC_TYPES = {
|
|
8
|
-
DataType.BIG_INTEGER,
|
|
9
|
-
DataType.COUNT,
|
|
10
|
-
DataType.DECIMAL,
|
|
11
|
-
DataType.DOUBLE,
|
|
12
|
-
DataType.FLOAT,
|
|
13
|
-
DataType.INCREMENTAL,
|
|
14
|
-
DataType.INTEGER,
|
|
15
|
-
DataType.LONG,
|
|
16
|
-
DataType.SHORT,
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
def _fill_na_values(data: pd.DataFrame, structure: Schema) -> pd.DataFrame:
|
|
21
|
-
"""Fills missing values in the DataFrame based on the component type.
|
|
22
|
-
|
|
23
|
-
Numeric components are filled with "NaN".
|
|
24
|
-
Other components are filled with "#N/A".
|
|
25
|
-
If the structure does not have components,
|
|
26
|
-
all missing values are filled with "".
|
|
27
|
-
|
|
28
|
-
Args:
|
|
29
|
-
data: The DataFrame to fill.
|
|
30
|
-
structure: The structure definition (´Schema´).
|
|
31
|
-
|
|
32
|
-
Returns:
|
|
33
|
-
The DataFrame with filled missing values.
|
|
34
|
-
|
|
35
|
-
Raises:
|
|
36
|
-
Invalid: If the structure does not have components.
|
|
37
|
-
"""
|
|
38
|
-
for component in structure.components:
|
|
39
|
-
if component.id in data.columns:
|
|
40
|
-
if component.dtype in NUMERIC_TYPES:
|
|
41
|
-
data[component.id] = (
|
|
42
|
-
data[component.id].astype(object).fillna("NaN")
|
|
43
|
-
)
|
|
44
|
-
else:
|
|
45
|
-
data[component.id] = (
|
|
46
|
-
data[component.id].astype(object).fillna("#N/A")
|
|
47
|
-
)
|
|
48
|
-
|
|
49
|
-
return data
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
def _validate_explicit_null_values(
|
|
53
|
-
data: pd.DataFrame, structure: Schema
|
|
54
|
-
) -> None:
|
|
55
|
-
"""Validates that explicit null values are correct for the component type.
|
|
56
|
-
|
|
57
|
-
Numeric components must not contain "#N/A".
|
|
58
|
-
Non-numeric components must not contain "NaN".
|
|
59
|
-
|
|
60
|
-
Args:
|
|
61
|
-
data: The DataFrame to validate.
|
|
62
|
-
structure: The structure definition (´Schema´).
|
|
63
|
-
|
|
64
|
-
Raises:
|
|
65
|
-
Invalid: If invalid null values are found.
|
|
66
|
-
"""
|
|
67
|
-
for component in structure.components:
|
|
68
|
-
if component.id in data.columns:
|
|
69
|
-
series = data[component.id].astype(str)
|
|
70
|
-
if component.dtype in NUMERIC_TYPES:
|
|
71
|
-
# Numeric: #N/A is invalid
|
|
72
|
-
if series.isin(["#N/A"]).any():
|
|
73
|
-
raise Invalid(
|
|
74
|
-
f"Invalid null value '#N/A' in numeric component "
|
|
75
|
-
f"'{component.id}'."
|
|
76
|
-
)
|
|
77
|
-
else:
|
|
78
|
-
# Non-numeric: NaN is invalid
|
|
79
|
-
if series.isin(["NaN"]).any():
|
|
80
|
-
raise Invalid(
|
|
81
|
-
f"Invalid null value 'NaN' in non-numeric component "
|
|
82
|
-
f"'{component.id}'."
|
|
83
|
-
)
|
|
File without changes
|
|
File without changes
|