pysdmx 1.10.1__py3-none-any.whl → 1.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pysdmx/__init__.py +1 -1
- pysdmx/api/fmr/maintenance.py +10 -5
- pysdmx/io/input_processor.py +4 -0
- pysdmx/io/json/fusion/messages/constraint.py +22 -1
- pysdmx/io/json/fusion/messages/dsd.py +20 -14
- pysdmx/io/json/fusion/messages/msd.py +6 -9
- pysdmx/io/json/fusion/messages/schema.py +20 -1
- pysdmx/io/json/sdmxjson2/messages/core.py +12 -5
- pysdmx/io/json/sdmxjson2/messages/dsd.py +11 -17
- pysdmx/io/json/sdmxjson2/messages/msd.py +2 -5
- pysdmx/io/json/sdmxjson2/messages/report.py +7 -3
- pysdmx/io/json/sdmxjson2/messages/schema.py +38 -5
- pysdmx/io/json/sdmxjson2/messages/structure.py +7 -3
- pysdmx/io/json/sdmxjson2/reader/metadata.py +3 -3
- pysdmx/io/json/sdmxjson2/reader/structure.py +3 -3
- pysdmx/io/json/sdmxjson2/writer/_helper.py +118 -0
- pysdmx/io/json/sdmxjson2/writer/v2_0/__init__.py +1 -0
- pysdmx/io/json/sdmxjson2/writer/v2_0/metadata.py +33 -0
- pysdmx/io/json/sdmxjson2/writer/v2_0/structure.py +33 -0
- pysdmx/io/json/sdmxjson2/writer/v2_1/__init__.py +1 -0
- pysdmx/io/json/sdmxjson2/writer/v2_1/metadata.py +31 -0
- pysdmx/io/json/sdmxjson2/writer/v2_1/structure.py +33 -0
- pysdmx/io/reader.py +12 -3
- pysdmx/io/writer.py +13 -3
- pysdmx/io/xml/__ss_aux_reader.py +39 -17
- pysdmx/io/xml/__structure_aux_reader.py +221 -33
- pysdmx/io/xml/__structure_aux_writer.py +304 -5
- pysdmx/io/xml/__tokens.py +12 -0
- pysdmx/io/xml/__write_aux.py +9 -0
- pysdmx/io/xml/sdmx21/writer/generic.py +2 -2
- pysdmx/model/dataflow.py +11 -2
- pysdmx/toolkit/pd/_data_utils.py +1 -1
- {pysdmx-1.10.1.dist-info → pysdmx-1.12.0.dist-info}/METADATA +7 -1
- {pysdmx-1.10.1.dist-info → pysdmx-1.12.0.dist-info}/RECORD +36 -31
- {pysdmx-1.10.1.dist-info → pysdmx-1.12.0.dist-info}/WHEEL +1 -1
- pysdmx/io/json/sdmxjson2/writer/metadata.py +0 -60
- pysdmx/io/json/sdmxjson2/writer/structure.py +0 -61
- {pysdmx-1.10.1.dist-info → pysdmx-1.12.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
"""Writer interface for SDMX-JSON 2.0.0 Reference Metadata messages."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Literal, Optional, Sequence, Union
|
|
5
|
+
|
|
6
|
+
import msgspec
|
|
7
|
+
|
|
8
|
+
from pysdmx.io.json.sdmxjson2.messages import (
|
|
9
|
+
JsonMetadataMessage,
|
|
10
|
+
JsonStructureMessage,
|
|
11
|
+
)
|
|
12
|
+
from pysdmx.model import MetadataReport, encoders
|
|
13
|
+
from pysdmx.model.__base import MaintainableArtefact
|
|
14
|
+
from pysdmx.model.message import Header, MetadataMessage, StructureMessage
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def write_metadata_msg(
|
|
18
|
+
reports: Sequence[MetadataReport],
|
|
19
|
+
output_path: Optional[Union[str, Path]] = None,
|
|
20
|
+
prettyprint: bool = True,
|
|
21
|
+
header: Optional[Header] = None,
|
|
22
|
+
msg_version: Literal["2.0.0", "2.1"] = "2.0.0",
|
|
23
|
+
) -> Optional[str]:
|
|
24
|
+
"""Write metadata reports in requested SDMX-JSON version.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
reports: The reference metadata reports to be serialized.
|
|
28
|
+
output_path: The path to save the JSON file. If None or empty, the
|
|
29
|
+
serialized content is returned as a string instead.
|
|
30
|
+
prettyprint: Whether to format the JSON output with indentation (True)
|
|
31
|
+
or output compact JSON without extra whitespace (False).
|
|
32
|
+
header: The header to be used in the SDMX-JSON message
|
|
33
|
+
(will be generated if no header is supplied).
|
|
34
|
+
msg_version: The desired version of SDMX-JSON. Defaults to 2.0.0.
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
The JSON string if output_path is None or empty, None otherwise.
|
|
38
|
+
"""
|
|
39
|
+
if not header:
|
|
40
|
+
header = Header()
|
|
41
|
+
sm = MetadataMessage(header, reports)
|
|
42
|
+
jsm = JsonMetadataMessage.from_model(sm, msg_version)
|
|
43
|
+
|
|
44
|
+
encoder = msgspec.json.Encoder(enc_hook=encoders)
|
|
45
|
+
serialized_data = encoder.encode(jsm)
|
|
46
|
+
|
|
47
|
+
# Apply pretty-printing if requested
|
|
48
|
+
if prettyprint:
|
|
49
|
+
serialized_data = msgspec.json.format(serialized_data, indent=4)
|
|
50
|
+
|
|
51
|
+
# If output_path is provided, write to file
|
|
52
|
+
if output_path:
|
|
53
|
+
# Convert to Path object if string
|
|
54
|
+
if isinstance(output_path, str):
|
|
55
|
+
output_path = Path(output_path)
|
|
56
|
+
|
|
57
|
+
# Create parent directories if they don't exist
|
|
58
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
59
|
+
|
|
60
|
+
# Write to file
|
|
61
|
+
with open(output_path, "wb") as f:
|
|
62
|
+
f.write(serialized_data)
|
|
63
|
+
return None
|
|
64
|
+
else:
|
|
65
|
+
# Return as string
|
|
66
|
+
return serialized_data.decode("utf-8")
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def write_structure_msg(
|
|
70
|
+
structures: Sequence[MaintainableArtefact],
|
|
71
|
+
output_path: Optional[Union[str, Path]] = None,
|
|
72
|
+
prettyprint: bool = True,
|
|
73
|
+
header: Optional[Header] = None,
|
|
74
|
+
msg_version: Literal["2.0.0", "2.1"] = "2.0.0",
|
|
75
|
+
) -> Optional[str]:
|
|
76
|
+
"""Write maintainable SDMX artefacts in SDMX-JSON 2.0.0.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
structures: The maintainable SDMX artefacts to be serialized.
|
|
80
|
+
output_path: The path to save the JSON file. If None or empty, the
|
|
81
|
+
serialized content is returned as a string instead.
|
|
82
|
+
prettyprint: Whether to format the JSON output with indentation (True)
|
|
83
|
+
or output compact JSON without extra whitespace (False).
|
|
84
|
+
header: The header to be used in the SDMX-JSON message
|
|
85
|
+
(will be generated if no header is supplied).
|
|
86
|
+
msg_version: The desired version of SDMX-JSON. Defaults to 2.0.0.
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
The JSON string if output_path is None or empty, None otherwise.
|
|
90
|
+
"""
|
|
91
|
+
if not header:
|
|
92
|
+
header = Header()
|
|
93
|
+
sm = StructureMessage(header, structures)
|
|
94
|
+
jsm = JsonStructureMessage.from_model(sm, msg_version)
|
|
95
|
+
|
|
96
|
+
encoder = msgspec.json.Encoder(enc_hook=encoders)
|
|
97
|
+
serialized_data = encoder.encode(jsm)
|
|
98
|
+
|
|
99
|
+
# Apply pretty-printing if requested
|
|
100
|
+
if prettyprint:
|
|
101
|
+
serialized_data = msgspec.json.format(serialized_data, indent=4)
|
|
102
|
+
|
|
103
|
+
# If output_path is provided, write to file
|
|
104
|
+
if output_path:
|
|
105
|
+
# Convert to Path object if string
|
|
106
|
+
if isinstance(output_path, str):
|
|
107
|
+
output_path = Path(output_path)
|
|
108
|
+
|
|
109
|
+
# Create parent directories if they don't exist
|
|
110
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
111
|
+
|
|
112
|
+
# Write to file
|
|
113
|
+
with open(output_path, "wb") as f:
|
|
114
|
+
f.write(serialized_data)
|
|
115
|
+
return None
|
|
116
|
+
else:
|
|
117
|
+
# Return as string
|
|
118
|
+
return serialized_data.decode("utf-8")
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Collection of writers for SDMX-JSON 2.0.0 messages."""
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Writer interface for SDMX-JSON 2.0.0 Reference Metadata messages."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Optional, Sequence, Union
|
|
5
|
+
|
|
6
|
+
from pysdmx.io.json.sdmxjson2.writer._helper import write_metadata_msg
|
|
7
|
+
from pysdmx.model import MetadataReport
|
|
8
|
+
from pysdmx.model.message import Header
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def write(
|
|
12
|
+
reports: Sequence[MetadataReport],
|
|
13
|
+
output_path: Optional[Union[str, Path]] = None,
|
|
14
|
+
prettyprint: bool = True,
|
|
15
|
+
header: Optional[Header] = None,
|
|
16
|
+
) -> Optional[str]:
|
|
17
|
+
"""Write metadata reports in SDMX-JSON 2.0.0.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
reports: The reference metadata reports to be serialized.
|
|
21
|
+
output_path: The path to save the JSON file. If None or empty, the
|
|
22
|
+
serialized content is returned as a string instead.
|
|
23
|
+
prettyprint: Whether to format the JSON output with indentation (True)
|
|
24
|
+
or output compact JSON without extra whitespace (False).
|
|
25
|
+
header: The header to be used in the SDMX-JSON message
|
|
26
|
+
(will be generated if no header is supplied).
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
The JSON string if output_path is None or empty, None otherwise.
|
|
30
|
+
"""
|
|
31
|
+
return write_metadata_msg(
|
|
32
|
+
reports, output_path, prettyprint, header, "2.0.0"
|
|
33
|
+
)
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Writer interface for SDMX-JSON 2.0.0 Structure messages."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Optional, Sequence, Union
|
|
5
|
+
|
|
6
|
+
from pysdmx.io.json.sdmxjson2.writer._helper import write_structure_msg
|
|
7
|
+
from pysdmx.model.__base import MaintainableArtefact
|
|
8
|
+
from pysdmx.model.message import Header
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def write(
|
|
12
|
+
structures: Sequence[MaintainableArtefact],
|
|
13
|
+
output_path: Optional[Union[str, Path]] = None,
|
|
14
|
+
prettyprint: bool = True,
|
|
15
|
+
header: Optional[Header] = None,
|
|
16
|
+
) -> Optional[str]:
|
|
17
|
+
"""Write maintainable SDMX artefacts in SDMX-JSON 2.0.0.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
structures: The maintainable SDMX artefacts to be serialized.
|
|
21
|
+
output_path: The path to save the JSON file. If None or empty, the
|
|
22
|
+
serialized content is returned as a string instead.
|
|
23
|
+
prettyprint: Whether to format the JSON output with indentation (True)
|
|
24
|
+
or output compact JSON without extra whitespace (False).
|
|
25
|
+
header: The header to be used in the SDMX-JSON message
|
|
26
|
+
(will be generated if no header is supplied).
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
The JSON string if output_path is None or empty, None otherwise.
|
|
30
|
+
"""
|
|
31
|
+
return write_structure_msg(
|
|
32
|
+
structures, output_path, prettyprint, header, "2.0.0"
|
|
33
|
+
)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Collection of writers for SDMX-JSON 2.1.0 messages."""
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""Writer interface for SDMX-JSON 2.1.0 Reference Metadata messages."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Optional, Sequence, Union
|
|
5
|
+
|
|
6
|
+
from pysdmx.io.json.sdmxjson2.writer._helper import write_metadata_msg
|
|
7
|
+
from pysdmx.model import MetadataReport
|
|
8
|
+
from pysdmx.model.message import Header
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def write(
|
|
12
|
+
reports: Sequence[MetadataReport],
|
|
13
|
+
output_path: Optional[Union[str, Path]] = None,
|
|
14
|
+
prettyprint: bool = True,
|
|
15
|
+
header: Optional[Header] = None,
|
|
16
|
+
) -> Optional[str]:
|
|
17
|
+
"""Write metadata reports in SDMX-JSON 2.1.0.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
reports: The reference metadata reports to be serialized.
|
|
21
|
+
output_path: The path to save the JSON file. If None or empty, the
|
|
22
|
+
serialized content is returned as a string instead.
|
|
23
|
+
prettyprint: Whether to format the JSON output with indentation (True)
|
|
24
|
+
or output compact JSON without extra whitespace (False).
|
|
25
|
+
header: The header to be used in the SDMX-JSON message
|
|
26
|
+
(will be generated if no header is supplied).
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
The JSON string if output_path is None or empty, None otherwise.
|
|
30
|
+
"""
|
|
31
|
+
return write_metadata_msg(reports, output_path, prettyprint, header, "2.1")
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Writer interface for SDMX-JSON 2.1.0 Structure messages."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Optional, Sequence, Union
|
|
5
|
+
|
|
6
|
+
from pysdmx.io.json.sdmxjson2.writer._helper import write_structure_msg
|
|
7
|
+
from pysdmx.model.__base import MaintainableArtefact
|
|
8
|
+
from pysdmx.model.message import Header
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def write(
|
|
12
|
+
structures: Sequence[MaintainableArtefact],
|
|
13
|
+
output_path: Optional[Union[str, Path]] = None,
|
|
14
|
+
prettyprint: bool = True,
|
|
15
|
+
header: Optional[Header] = None,
|
|
16
|
+
) -> Optional[str]:
|
|
17
|
+
"""Write maintainable SDMX artefacts in SDMX-JSON 2.1.0.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
structures: The maintainable SDMX artefacts to be serialized.
|
|
21
|
+
output_path: The path to save the JSON file. If None or empty, the
|
|
22
|
+
serialized content is returned as a string instead.
|
|
23
|
+
prettyprint: Whether to format the JSON output with indentation (True)
|
|
24
|
+
or output compact JSON without extra whitespace (False).
|
|
25
|
+
header: The header to be used in the SDMX-JSON message
|
|
26
|
+
(will be generated if no header is supplied).
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
The JSON string if output_path is None or empty, None otherwise.
|
|
30
|
+
"""
|
|
31
|
+
return write_structure_msg(
|
|
32
|
+
structures, output_path, prettyprint, header, "2.1"
|
|
33
|
+
)
|
pysdmx/io/reader.py
CHANGED
|
@@ -80,7 +80,10 @@ def read_sdmx( # noqa: C901
|
|
|
80
80
|
header = read_header(input_str, validate=validate)
|
|
81
81
|
# SDMX-ML 3.1 Structure
|
|
82
82
|
result_structures = read_structure(input_str, validate=validate)
|
|
83
|
-
elif read_format
|
|
83
|
+
elif read_format in (
|
|
84
|
+
Format.STRUCTURE_SDMX_JSON_2_0_0,
|
|
85
|
+
Format.STRUCTURE_SDMX_JSON_2_1_0,
|
|
86
|
+
):
|
|
84
87
|
from pysdmx.io.json.sdmxjson2.reader.structure import (
|
|
85
88
|
read as read_struct,
|
|
86
89
|
)
|
|
@@ -90,7 +93,10 @@ def read_sdmx( # noqa: C901
|
|
|
90
93
|
result_structures = (
|
|
91
94
|
struct_msg.structures if struct_msg.structures else []
|
|
92
95
|
)
|
|
93
|
-
elif read_format
|
|
96
|
+
elif read_format in (
|
|
97
|
+
Format.REFMETA_SDMX_JSON_2_0_0,
|
|
98
|
+
Format.REFMETA_SDMX_JSON_2_1_0,
|
|
99
|
+
):
|
|
94
100
|
from pysdmx.io.json.sdmxjson2.reader.metadata import (
|
|
95
101
|
read as read_refmeta,
|
|
96
102
|
)
|
|
@@ -174,7 +180,10 @@ def read_sdmx( # noqa: C901
|
|
|
174
180
|
return Message(header=header, data=result_data)
|
|
175
181
|
elif read_format == Format.REGISTRY_SDMX_ML_2_1:
|
|
176
182
|
return Message(header=header, submission=result_submission)
|
|
177
|
-
elif read_format
|
|
183
|
+
elif read_format in (
|
|
184
|
+
Format.REFMETA_SDMX_JSON_2_0_0,
|
|
185
|
+
Format.REFMETA_SDMX_JSON_2_1_0,
|
|
186
|
+
):
|
|
178
187
|
return Message(header=header, reports=reports)
|
|
179
188
|
# TODO: Ensure we have changed the signature of the structure readers
|
|
180
189
|
return Message(header=header, structures=result_structures)
|
pysdmx/io/writer.py
CHANGED
|
@@ -24,10 +24,16 @@ WRITERS = {
|
|
|
24
24
|
Format.DATA_SDMX_ML_3_1: "pysdmx.io.xml.sdmx31.writer.structure_specific",
|
|
25
25
|
Format.STRUCTURE_SDMX_ML_3_1: "pysdmx.io.xml.sdmx31.writer.structure",
|
|
26
26
|
Format.STRUCTURE_SDMX_JSON_2_0_0: (
|
|
27
|
-
"pysdmx.io.json.sdmxjson2.writer.structure"
|
|
27
|
+
"pysdmx.io.json.sdmxjson2.writer.v2_0.structure"
|
|
28
|
+
),
|
|
29
|
+
Format.STRUCTURE_SDMX_JSON_2_1_0: (
|
|
30
|
+
"pysdmx.io.json.sdmxjson2.writer.v2_1.structure"
|
|
28
31
|
),
|
|
29
32
|
Format.REFMETA_SDMX_JSON_2_0_0: (
|
|
30
|
-
"pysdmx.io.json.sdmxjson2.writer.metadata"
|
|
33
|
+
"pysdmx.io.json.sdmxjson2.writer.v2_0.metadata"
|
|
34
|
+
),
|
|
35
|
+
Format.REFMETA_SDMX_JSON_2_1_0: (
|
|
36
|
+
"pysdmx.io.json.sdmxjson2.writer.v2_1.metadata"
|
|
31
37
|
),
|
|
32
38
|
}
|
|
33
39
|
|
|
@@ -36,9 +42,13 @@ STRUCTURE_WRITERS = (
|
|
|
36
42
|
Format.STRUCTURE_SDMX_ML_3_0,
|
|
37
43
|
Format.STRUCTURE_SDMX_ML_3_1,
|
|
38
44
|
Format.STRUCTURE_SDMX_JSON_2_0_0,
|
|
45
|
+
Format.STRUCTURE_SDMX_JSON_2_1_0,
|
|
39
46
|
)
|
|
40
47
|
|
|
41
|
-
REFMETA_WRITERS = (
|
|
48
|
+
REFMETA_WRITERS = (
|
|
49
|
+
Format.REFMETA_SDMX_JSON_2_0_0,
|
|
50
|
+
Format.REFMETA_SDMX_JSON_2_1_0,
|
|
51
|
+
)
|
|
42
52
|
|
|
43
53
|
|
|
44
54
|
def write_sdmx(
|
pysdmx/io/xml/__ss_aux_reader.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""SDMX XML StructureSpecificData reader aux module."""
|
|
2
2
|
|
|
3
3
|
import itertools
|
|
4
|
-
from typing import Any, Dict
|
|
4
|
+
from typing import Any, Dict, List
|
|
5
5
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
import pandas as pd
|
|
@@ -40,23 +40,22 @@ def _reading_str_series(dataset: Dict[str, Any]) -> pd.DataFrame:
|
|
|
40
40
|
return df
|
|
41
41
|
|
|
42
42
|
|
|
43
|
-
def _reading_group_data(dataset: Dict[str, Any]) -> pd.DataFrame:
|
|
43
|
+
def _reading_group_data(dataset: Dict[str, Any]) -> List[pd.DataFrame]:
|
|
44
44
|
# Structure Specific Group Data
|
|
45
|
-
|
|
46
|
-
df = None
|
|
45
|
+
group_dfs = []
|
|
47
46
|
dataset[GROUP] = add_list(dataset[GROUP])
|
|
48
47
|
for data in dataset[GROUP]:
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
test_list, df = __process_df(test_list, df, is_end=True)
|
|
48
|
+
group_dict = dict(data.items())
|
|
49
|
+
group_df = pd.DataFrame([group_dict])
|
|
52
50
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
51
|
+
# Remove :type columns
|
|
52
|
+
cols_to_delete = [x for x in group_df.columns if ":type" in x]
|
|
53
|
+
for x in cols_to_delete:
|
|
54
|
+
del group_df[x]
|
|
56
55
|
|
|
57
|
-
|
|
56
|
+
group_dfs.append(group_df)
|
|
58
57
|
|
|
59
|
-
return
|
|
58
|
+
return group_dfs
|
|
60
59
|
|
|
61
60
|
|
|
62
61
|
def _get_at_att_str(dataset: Dict[str, Any]) -> Dict[str, Any]:
|
|
@@ -76,11 +75,34 @@ def _parse_structure_specific_data(
|
|
|
76
75
|
# Structure Specific Series
|
|
77
76
|
df = _reading_str_series(dataset)
|
|
78
77
|
if GROUP in dataset:
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
78
|
+
group_dfs = _reading_group_data(dataset)
|
|
79
|
+
original_columns = df.columns.tolist()
|
|
80
|
+
for group_df in group_dfs:
|
|
81
|
+
# Find non-NaN columns in this group
|
|
82
|
+
non_nan_cols = [
|
|
83
|
+
col
|
|
84
|
+
for col in group_df.columns
|
|
85
|
+
if not group_df[col].isna().all()
|
|
86
|
+
]
|
|
87
|
+
|
|
88
|
+
# Merge keys are intersection of original and non-NaN cols
|
|
89
|
+
merge_cols = list(
|
|
90
|
+
set(original_columns).intersection(set(non_nan_cols))
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
group_df = group_df.drop_duplicates(merge_cols, keep="first")
|
|
94
|
+
df = pd.merge(
|
|
95
|
+
df,
|
|
96
|
+
group_df,
|
|
97
|
+
on=merge_cols,
|
|
98
|
+
how="left",
|
|
99
|
+
suffixes=("", "_drop"),
|
|
100
|
+
)
|
|
101
|
+
for col in list(df.columns):
|
|
102
|
+
if col.endswith("_drop"):
|
|
103
|
+
original = col[:-5]
|
|
104
|
+
df[original] = df[original].fillna(df[col])
|
|
105
|
+
df.drop(col, axis=1, inplace=True)
|
|
84
106
|
elif OBS in dataset:
|
|
85
107
|
dataset[OBS] = add_list(dataset[OBS])
|
|
86
108
|
# Structure Specific All dimensions
|