pysdmx 1.5.2__py3-none-any.whl → 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pysdmx/__init__.py +1 -1
- pysdmx/api/fmr/__init__.py +8 -3
- pysdmx/api/fmr/maintenance.py +158 -0
- pysdmx/api/qb/structure.py +1 -0
- pysdmx/api/qb/util.py +1 -0
- pysdmx/io/csv/__csv_aux_reader.py +99 -0
- pysdmx/io/csv/__csv_aux_writer.py +118 -0
- pysdmx/io/csv/sdmx10/reader/__init__.py +9 -14
- pysdmx/io/csv/sdmx10/writer/__init__.py +28 -2
- pysdmx/io/csv/sdmx20/__init__.py +0 -9
- pysdmx/io/csv/sdmx20/reader/__init__.py +8 -61
- pysdmx/io/csv/sdmx20/writer/__init__.py +32 -25
- pysdmx/io/csv/sdmx21/__init__.py +1 -0
- pysdmx/io/csv/sdmx21/reader/__init__.py +86 -0
- pysdmx/io/csv/sdmx21/writer/__init__.py +70 -0
- pysdmx/io/format.py +8 -0
- pysdmx/io/input_processor.py +20 -6
- pysdmx/io/json/fusion/messages/code.py +21 -4
- pysdmx/io/json/fusion/messages/concept.py +10 -8
- pysdmx/io/json/fusion/messages/dataflow.py +8 -1
- pysdmx/io/json/fusion/messages/dsd.py +15 -0
- pysdmx/io/json/fusion/messages/schema.py +8 -1
- pysdmx/io/json/sdmxjson2/messages/agency.py +43 -7
- pysdmx/io/json/sdmxjson2/messages/category.py +92 -7
- pysdmx/io/json/sdmxjson2/messages/code.py +265 -22
- pysdmx/io/json/sdmxjson2/messages/concept.py +75 -13
- pysdmx/io/json/sdmxjson2/messages/constraint.py +5 -5
- pysdmx/io/json/sdmxjson2/messages/core.py +121 -14
- pysdmx/io/json/sdmxjson2/messages/dataflow.py +63 -8
- pysdmx/io/json/sdmxjson2/messages/dsd.py +215 -20
- pysdmx/io/json/sdmxjson2/messages/map.py +200 -24
- pysdmx/io/json/sdmxjson2/messages/pa.py +36 -5
- pysdmx/io/json/sdmxjson2/messages/provider.py +35 -7
- pysdmx/io/json/sdmxjson2/messages/report.py +85 -7
- pysdmx/io/json/sdmxjson2/messages/schema.py +11 -12
- pysdmx/io/json/sdmxjson2/messages/structure.py +150 -2
- pysdmx/io/json/sdmxjson2/messages/vtl.py +547 -17
- pysdmx/io/json/sdmxjson2/reader/metadata.py +32 -0
- pysdmx/io/json/sdmxjson2/reader/structure.py +32 -0
- pysdmx/io/json/sdmxjson2/writer/__init__.py +9 -0
- pysdmx/io/json/sdmxjson2/writer/metadata.py +60 -0
- pysdmx/io/json/sdmxjson2/writer/structure.py +61 -0
- pysdmx/io/reader.py +28 -9
- pysdmx/io/serde.py +17 -0
- pysdmx/io/writer.py +45 -9
- pysdmx/io/xml/__ss_aux_reader.py +1 -2
- pysdmx/io/xml/__structure_aux_reader.py +15 -10
- pysdmx/io/xml/__structure_aux_writer.py +15 -13
- pysdmx/io/xml/__write_data_aux.py +6 -57
- pysdmx/io/xml/__write_structure_specific_aux.py +7 -3
- pysdmx/io/xml/doc_validation.py +1 -3
- pysdmx/io/xml/sdmx21/writer/generic.py +6 -4
- pysdmx/model/__init__.py +1 -3
- pysdmx/model/code.py +11 -1
- pysdmx/model/dataflow.py +23 -0
- pysdmx/model/map.py +19 -13
- pysdmx/model/message.py +10 -5
- pysdmx/toolkit/pd/_data_utils.py +99 -0
- pysdmx/toolkit/vtl/_validations.py +2 -3
- {pysdmx-1.5.2.dist-info → pysdmx-1.7.0.dist-info}/METADATA +4 -3
- {pysdmx-1.5.2.dist-info → pysdmx-1.7.0.dist-info}/RECORD +63 -51
- {pysdmx-1.5.2.dist-info → pysdmx-1.7.0.dist-info}/WHEEL +1 -1
- {pysdmx-1.5.2.dist-info → pysdmx-1.7.0.dist-info/licenses}/LICENSE +0 -0
|
@@ -1,17 +1,21 @@
|
|
|
1
1
|
"""SDMX 2.0 CSV writer module."""
|
|
2
2
|
|
|
3
|
-
from copy import copy
|
|
4
3
|
from pathlib import Path
|
|
5
|
-
from typing import Optional, Sequence, Union
|
|
4
|
+
from typing import Literal, Optional, Sequence, Union
|
|
6
5
|
|
|
7
6
|
import pandas as pd
|
|
8
7
|
|
|
9
|
-
from pysdmx.io.csv.
|
|
8
|
+
from pysdmx.io.csv.__csv_aux_writer import (
|
|
9
|
+
_write_csv_2_aux,
|
|
10
|
+
)
|
|
10
11
|
from pysdmx.io.pd import PandasDataset
|
|
11
12
|
|
|
12
13
|
|
|
13
14
|
def write(
|
|
14
15
|
datasets: Sequence[PandasDataset],
|
|
16
|
+
labels: Optional[Literal["name", "id", "both"]] = None,
|
|
17
|
+
time_format: Optional[Literal["original", "normalized"]] = None,
|
|
18
|
+
keys: Optional[Literal["obs", "series", "both"]] = None,
|
|
15
19
|
output_path: Optional[Union[str, Path]] = None,
|
|
16
20
|
) -> Optional[str]:
|
|
17
21
|
"""Write data to SDMX-CSV 2.0 format.
|
|
@@ -19,6 +23,25 @@ def write(
|
|
|
19
23
|
Args:
|
|
20
24
|
datasets: List of datasets to write.
|
|
21
25
|
Must have the same components.
|
|
26
|
+
labels: How to write the name of the columns.
|
|
27
|
+
If None, only the IDs are written.
|
|
28
|
+
if "id", the names are written as ID only.
|
|
29
|
+
if "name", a colum called "STRUCTURE_NAME" is
|
|
30
|
+
added after struture ID.
|
|
31
|
+
If "both", the names are witten as id:Name.
|
|
32
|
+
time_format: How to write the time period.
|
|
33
|
+
If None, the time period is not modified.
|
|
34
|
+
If "original", the time period is written as it
|
|
35
|
+
is in the dataset.
|
|
36
|
+
"normalized" is not implemented yet.
|
|
37
|
+
keys: to write or not the keys columns
|
|
38
|
+
If None, no keys are written.
|
|
39
|
+
If "obs", the keys are write as a single
|
|
40
|
+
column called "OBS_KEY".
|
|
41
|
+
If "series", the keys are write as a single
|
|
42
|
+
column called "SERIES_KEY".
|
|
43
|
+
If "both", the keys are write as two columns:
|
|
44
|
+
"OBS_KEY" and "SERIES_KEY".
|
|
22
45
|
output_path: Path to write the data to.
|
|
23
46
|
If None, the data is returned as a string.
|
|
24
47
|
|
|
@@ -28,28 +51,12 @@ def write(
|
|
|
28
51
|
# Link to pandas.to_csv documentation on sphinx:
|
|
29
52
|
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_csv.html
|
|
30
53
|
|
|
31
|
-
dataframes =
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
for k, v in dataset.attributes.items():
|
|
38
|
-
df[k] = v
|
|
39
|
-
|
|
40
|
-
structure_ref, unique_id = dataset.short_urn.split("=", maxsplit=1)
|
|
41
|
-
if structure_ref in ["DataStructure", "Dataflow"]:
|
|
42
|
-
structure_ref = structure_ref.lower()
|
|
43
|
-
else:
|
|
44
|
-
structure_ref = "dataprovision"
|
|
45
|
-
|
|
46
|
-
# Insert two columns at the beginning of the data set
|
|
47
|
-
df.insert(0, "STRUCTURE", structure_ref)
|
|
48
|
-
df.insert(1, "STRUCTURE_ID", unique_id)
|
|
49
|
-
action_value = SDMX_CSV_ACTION_MAPPER[dataset.action]
|
|
50
|
-
df.insert(2, "ACTION", action_value)
|
|
51
|
-
|
|
52
|
-
dataframes.append(df)
|
|
54
|
+
dataframes = _write_csv_2_aux(
|
|
55
|
+
datasets,
|
|
56
|
+
labels,
|
|
57
|
+
time_format,
|
|
58
|
+
keys,
|
|
59
|
+
)
|
|
53
60
|
|
|
54
61
|
all_data = pd.concat(dataframes, ignore_index=True, axis=0)
|
|
55
62
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""SDMX 2.1 CSV reader and writer."""
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""SDMX 2.1 CSV reader module."""
|
|
2
|
+
|
|
3
|
+
from io import StringIO
|
|
4
|
+
from typing import Sequence
|
|
5
|
+
|
|
6
|
+
import pandas as pd
|
|
7
|
+
|
|
8
|
+
from pysdmx.errors import Invalid
|
|
9
|
+
from pysdmx.io.csv.__csv_aux_reader import __generate_dataset_from_sdmx_csv
|
|
10
|
+
from pysdmx.io.pd import PandasDataset
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def read(input_str: str) -> Sequence[PandasDataset]:
|
|
14
|
+
"""Reads csv data and returns a sequence of Datasets.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
input_str: str.
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
A Sequence of Pandas Datasets.
|
|
21
|
+
|
|
22
|
+
Raises:
|
|
23
|
+
Invalid: If it is an invalid CSV file.
|
|
24
|
+
"""
|
|
25
|
+
# Get Dataframe from CSV file
|
|
26
|
+
df_csv = pd.read_csv(
|
|
27
|
+
StringIO(input_str), keep_default_na=False, na_values=[""]
|
|
28
|
+
)
|
|
29
|
+
# Drop empty columns
|
|
30
|
+
df_csv = df_csv.dropna(axis=1, how="all")
|
|
31
|
+
|
|
32
|
+
# Determine SDMX-CSV version based on column names
|
|
33
|
+
if (
|
|
34
|
+
"STRUCTURE" not in df_csv.columns
|
|
35
|
+
or "STRUCTURE_ID" not in df_csv.columns
|
|
36
|
+
):
|
|
37
|
+
# Raise an exception if the CSV file is not in SDMX-CSV format
|
|
38
|
+
raise Invalid(
|
|
39
|
+
"Only SDMX-CSV 2.1 is allowed",
|
|
40
|
+
"Invalid SDMX-CSV 2.1 file. "
|
|
41
|
+
"Check the docs for the proper structure on content.",
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
# Convert all columns to strings
|
|
45
|
+
df_csv = df_csv.astype(str).replace({"nan": "", "<NA>": ""})
|
|
46
|
+
# Check if any column headers contain ':', indicating mode, label or text
|
|
47
|
+
mode_label_text = any(":" in x for x in df_csv.columns)
|
|
48
|
+
# if values in the columns contain ':', split them
|
|
49
|
+
for col in df_csv.columns[2:]:
|
|
50
|
+
df_csv[col] = (
|
|
51
|
+
df_csv[col]
|
|
52
|
+
.astype(str)
|
|
53
|
+
.apply(lambda x: x.split(":")[0] if ":" in x else x)
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
id_column = "STRUCTURE_ID"
|
|
57
|
+
# If mode, label or text is present, modify the DataFrame
|
|
58
|
+
if mode_label_text:
|
|
59
|
+
# Split the ID column to remove mode, label or text
|
|
60
|
+
df_csv[id_column] = df_csv[id_column].map(lambda x: x.split(": ")[0])
|
|
61
|
+
# Split the other columns to remove mode, label, or text
|
|
62
|
+
sequence = 3
|
|
63
|
+
for x in df_csv.columns[sequence:]:
|
|
64
|
+
df_csv[x.split(":")[0]] = df_csv[x].map(
|
|
65
|
+
lambda x: x.split(": ", 2)[0], na_action="ignore"
|
|
66
|
+
)
|
|
67
|
+
# Delete the original columns
|
|
68
|
+
del df_csv[x]
|
|
69
|
+
|
|
70
|
+
# Grouping columns to separate datasets
|
|
71
|
+
grouping_columns = ["STRUCTURE", "STRUCTURE_ID"]
|
|
72
|
+
# Separate SDMX-CSV in different datasets per Structure ID
|
|
73
|
+
list_df = [data for _, data in df_csv.groupby(grouping_columns)]
|
|
74
|
+
|
|
75
|
+
# Create a payload dictionary to store datasets with the
|
|
76
|
+
# different unique_ids as keys
|
|
77
|
+
payload = []
|
|
78
|
+
for df in list_df:
|
|
79
|
+
# Generate a dataset from each subset of the DataFrame
|
|
80
|
+
dataset = __generate_dataset_from_sdmx_csv(data=df, references_21=True)
|
|
81
|
+
|
|
82
|
+
# Add the dataset to the payload dictionary
|
|
83
|
+
payload.append(dataset)
|
|
84
|
+
|
|
85
|
+
# Return the payload generated
|
|
86
|
+
return payload
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""SDMX 2.1 CSV writer module."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Literal, Optional, Sequence, Union
|
|
5
|
+
|
|
6
|
+
import pandas as pd
|
|
7
|
+
|
|
8
|
+
from pysdmx.io.csv.__csv_aux_writer import (
|
|
9
|
+
_write_csv_2_aux,
|
|
10
|
+
)
|
|
11
|
+
from pysdmx.io.pd import PandasDataset
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def write(
|
|
15
|
+
datasets: Sequence[PandasDataset],
|
|
16
|
+
labels: Optional[Literal["name", "id", "both"]] = None,
|
|
17
|
+
time_format: Optional[Literal["original", "normalized"]] = None,
|
|
18
|
+
keys: Optional[Literal["obs", "series", "both"]] = None,
|
|
19
|
+
output_path: Optional[Union[str, Path]] = None,
|
|
20
|
+
) -> Optional[str]:
|
|
21
|
+
"""Write data to SDMX-CSV 2.1 format.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
datasets: List of datasets to write.
|
|
25
|
+
Must have the same components.
|
|
26
|
+
labels: How to write the name of the columns.
|
|
27
|
+
If None, only the IDs are written.
|
|
28
|
+
if "id", the names are written as ID only.
|
|
29
|
+
if "name", a colum called "STRUCTURE_NAME" is
|
|
30
|
+
added after struture ID.
|
|
31
|
+
If "both", the names are witten as id:Name.
|
|
32
|
+
time_format: How to write the time period.
|
|
33
|
+
If None, the time period is not modified.
|
|
34
|
+
If "original", the time period is written as it
|
|
35
|
+
is in the dataset.
|
|
36
|
+
"normalized" is not implemented yet.
|
|
37
|
+
keys: to write or not the keys columns
|
|
38
|
+
If None, no keys are written.
|
|
39
|
+
If "obs", the keys are write as a single
|
|
40
|
+
column called "OBS_KEY".
|
|
41
|
+
If "series", the keys are write as a single
|
|
42
|
+
column called "SERIES_KEY".
|
|
43
|
+
If "both", the keys are write as two columns:
|
|
44
|
+
"OBS_KEY" and "SERIES_KEY".
|
|
45
|
+
output_path: Path to write the data to.
|
|
46
|
+
If None, the data is returned as a string.
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
SDMX CSV data as a string, if output_path is None.
|
|
50
|
+
"""
|
|
51
|
+
# Link to pandas.to_csv documentation on sphinx:
|
|
52
|
+
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_csv.html
|
|
53
|
+
|
|
54
|
+
dataframes = _write_csv_2_aux(
|
|
55
|
+
datasets, labels, time_format, keys, references_21=True
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
all_data = pd.concat(dataframes, ignore_index=True, axis=0)
|
|
59
|
+
|
|
60
|
+
all_data = all_data.astype(str).replace({"nan": "", "<NA>": ""})
|
|
61
|
+
|
|
62
|
+
# If the output path is an empty string we use None
|
|
63
|
+
output_path = (
|
|
64
|
+
None
|
|
65
|
+
if isinstance(output_path, str) and output_path == ""
|
|
66
|
+
else output_path
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
# Convert the dataset into a csv file
|
|
70
|
+
return all_data.to_csv(output_path, index=False, header=True)
|
pysdmx/io/format.py
CHANGED
|
@@ -10,8 +10,10 @@ class Format(Enum):
|
|
|
10
10
|
|
|
11
11
|
DATA_SDMX_CSV_1_0_0 = f"{_BASE}data+csv;version=1.0.0"
|
|
12
12
|
DATA_SDMX_CSV_2_0_0 = f"{_BASE}data+csv;version=2.0.0"
|
|
13
|
+
DATA_SDMX_CSV_2_1_0 = f"{_BASE}data+csv;version=2.1.0"
|
|
13
14
|
DATA_SDMX_JSON_1_0_0 = f"{_BASE}data+json;version=1.0.0"
|
|
14
15
|
DATA_SDMX_JSON_2_0_0 = f"{_BASE}data+json;version=2.0.0"
|
|
16
|
+
DATA_SDMX_JSON_2_1_0 = f"{_BASE}data+json;version=2.1.0"
|
|
15
17
|
DATA_SDMX_ML_2_1_GEN = f"{_BASE}genericdata+xml;version=2.1"
|
|
16
18
|
DATA_SDMX_ML_2_1_STR = f"{_BASE}structurespecificdata+xml;version=2.1"
|
|
17
19
|
DATA_SDMX_ML_2_1_GENTS = f"{_BASE}generictimeseriesdata+xml;version=2.1"
|
|
@@ -22,7 +24,9 @@ class Format(Enum):
|
|
|
22
24
|
DATA_SDMX_ML_3_1 = f"{_BASE}data+xml;version=3.1.0"
|
|
23
25
|
GDS_JSON = "application/json"
|
|
24
26
|
REFMETA_SDMX_CSV_2_0_0 = f"{_BASE}metadata+csv;version=2.0.0"
|
|
27
|
+
REFMETA_SDMX_CSV_2_1_0 = f"{_BASE}metadata+csv;version=2.1.0"
|
|
25
28
|
REFMETA_SDMX_JSON_2_0_0 = f"{_BASE}metadata+json;version=2.0.0"
|
|
29
|
+
REFMETA_SDMX_JSON_2_1_0 = f"{_BASE}metadata+json;version=2.1.0"
|
|
26
30
|
REFMETA_SDMX_ML_3_0 = f"{_BASE}metadata+xml;version=3.0.0"
|
|
27
31
|
REFMETA_SDMX_ML_3_1 = f"{_BASE}metadata+xml;version=3.1.0"
|
|
28
32
|
REGISTRY_SDMX_ML_2_1 = f"{_BASE}registry+xml;version=2.1"
|
|
@@ -30,11 +34,13 @@ class Format(Enum):
|
|
|
30
34
|
REGISTRY_SDMX_ML_3_1 = f"{_BASE}registry+xml;version=3.1.0"
|
|
31
35
|
SCHEMA_SDMX_JSON_1_0_0 = f"{_BASE}schema+json;version=1.0.0"
|
|
32
36
|
SCHEMA_SDMX_JSON_2_0_0 = f"{_BASE}schema+json;version=2.0.0"
|
|
37
|
+
SCHEMA_SDMX_JSON_2_1_0 = f"{_BASE}schema+json;version=2.1.0"
|
|
33
38
|
SCHEMA_SDMX_ML_2_1 = f"{_BASE}schema+xml;version=2.1"
|
|
34
39
|
SCHEMA_SDMX_ML_3_0 = f"{_BASE}schema+xml;version=3.0.0"
|
|
35
40
|
SCHEMA_SDMX_ML_3_1 = f"{_BASE}schema+xml;version=3.1.0"
|
|
36
41
|
STRUCTURE_SDMX_JSON_1_0_0 = f"{_BASE}structure+json;version=1.0.0"
|
|
37
42
|
STRUCTURE_SDMX_JSON_2_0_0 = f"{_BASE}structure+json;version=2.0.0"
|
|
43
|
+
STRUCTURE_SDMX_JSON_2_1_0 = f"{_BASE}structure+json;version=2.1.0"
|
|
38
44
|
STRUCTURE_SDMX_ML_2_1 = f"{_BASE}structure+xml;version=2.1"
|
|
39
45
|
STRUCTURE_SDMX_ML_3_0 = f"{_BASE}structure+xml;version=3.0.0"
|
|
40
46
|
STRUCTURE_SDMX_ML_3_1 = f"{_BASE}structure+xml;version=3.1.0"
|
|
@@ -57,6 +63,7 @@ class DataFormat(Enum):
|
|
|
57
63
|
|
|
58
64
|
SDMX_CSV_1_0_0 = Format.DATA_SDMX_CSV_1_0_0.value
|
|
59
65
|
SDMX_CSV_2_0_0 = Format.DATA_SDMX_CSV_2_0_0.value
|
|
66
|
+
SDMX_CSV_2_1_0 = Format.DATA_SDMX_CSV_2_1_0.value
|
|
60
67
|
SDMX_JSON_1_0_0 = Format.DATA_SDMX_JSON_1_0_0.value
|
|
61
68
|
SDMX_JSON_2_0_0 = Format.DATA_SDMX_JSON_2_0_0.value
|
|
62
69
|
SDMX_ML_2_1_GEN = Format.DATA_SDMX_ML_2_1_GEN.value
|
|
@@ -71,6 +78,7 @@ class RefMetaFormat(Enum):
|
|
|
71
78
|
"""The SDMX Reference Metadata formats."""
|
|
72
79
|
|
|
73
80
|
SDMX_CSV_2_0_0 = Format.REFMETA_SDMX_CSV_2_0_0.value
|
|
81
|
+
SDMX_CSV_2_1_0 = Format.REFMETA_SDMX_CSV_2_1_0.value
|
|
74
82
|
SDMX_JSON_2_0_0 = Format.REFMETA_SDMX_JSON_2_0_0.value
|
|
75
83
|
SDMX_ML_3_0 = Format.REFMETA_SDMX_ML_3_0.value
|
|
76
84
|
SDMX_ML_3_1 = Format.REFMETA_SDMX_ML_3_1.value
|
pysdmx/io/input_processor.py
CHANGED
|
@@ -3,11 +3,11 @@
|
|
|
3
3
|
import csv
|
|
4
4
|
import os.path
|
|
5
5
|
from io import BytesIO, StringIO, TextIOWrapper
|
|
6
|
-
from json import JSONDecodeError, loads
|
|
7
6
|
from os import PathLike
|
|
8
7
|
from pathlib import Path
|
|
9
8
|
from typing import Optional, Tuple, Union
|
|
10
9
|
|
|
10
|
+
import msgspec
|
|
11
11
|
from httpx import Client as httpx_Client
|
|
12
12
|
from httpx import HTTPStatusError, create_ssl_context
|
|
13
13
|
|
|
@@ -29,7 +29,7 @@ def __check_xml(input_str: str) -> bool:
|
|
|
29
29
|
|
|
30
30
|
def __check_csv(input_str: str) -> bool:
|
|
31
31
|
try:
|
|
32
|
-
max_length =
|
|
32
|
+
max_length = min(2048, len(input_str))
|
|
33
33
|
dialect = csv.Sniffer().sniff(input_str[:max_length])
|
|
34
34
|
control_csv_format = (
|
|
35
35
|
dialect.delimiter == "," and dialect.quotechar == '"'
|
|
@@ -47,9 +47,9 @@ def __check_csv(input_str: str) -> bool:
|
|
|
47
47
|
|
|
48
48
|
def __check_json(input_str: str) -> bool:
|
|
49
49
|
try:
|
|
50
|
-
|
|
50
|
+
msgspec.json.decode(input_str)
|
|
51
51
|
return True
|
|
52
|
-
except
|
|
52
|
+
except msgspec.DecodeError:
|
|
53
53
|
return False
|
|
54
54
|
|
|
55
55
|
|
|
@@ -84,10 +84,24 @@ def __get_sdmx_csv_flavour(input_str: str) -> Tuple[str, Format]:
|
|
|
84
84
|
if "DATAFLOW" in headers:
|
|
85
85
|
return input_str, Format.DATA_SDMX_CSV_1_0_0
|
|
86
86
|
elif "STRUCTURE" in headers and "STRUCTURE_ID" in headers:
|
|
87
|
-
return input_str, Format.
|
|
87
|
+
return input_str, Format.DATA_SDMX_CSV_2_1_0
|
|
88
88
|
raise Invalid("Validation Error", "Cannot parse input as SDMX-CSV.")
|
|
89
89
|
|
|
90
90
|
|
|
91
|
+
def __get_sdmx_json_flavour(input_str: str) -> Tuple[str, Format]:
|
|
92
|
+
flavour_check = input_str[:400].lower()
|
|
93
|
+
if "2.0.0/sdmx-json-structure-schema.json" in flavour_check:
|
|
94
|
+
return input_str, Format.STRUCTURE_SDMX_JSON_2_0_0
|
|
95
|
+
elif "2.0.0/sdmx-json-metadata-schema.json" in flavour_check:
|
|
96
|
+
return input_str, Format.REFMETA_SDMX_JSON_2_0_0
|
|
97
|
+
elif "sdmx-json" in flavour_check:
|
|
98
|
+
raise NotImplemented(
|
|
99
|
+
"Unsupported format", "This flavour of SDMX-JSON is not supported."
|
|
100
|
+
)
|
|
101
|
+
else:
|
|
102
|
+
raise Invalid("Validation Error", "Cannot parse input as SDMX-JSON.")
|
|
103
|
+
|
|
104
|
+
|
|
91
105
|
def __check_sdmx_str(input_str: str) -> Tuple[str, Format]:
|
|
92
106
|
"""Attempts to infer the SDMX format of the input string."""
|
|
93
107
|
if __check_xml(input_str):
|
|
@@ -95,7 +109,7 @@ def __check_sdmx_str(input_str: str) -> Tuple[str, Format]:
|
|
|
95
109
|
if __check_csv(input_str):
|
|
96
110
|
return __get_sdmx_csv_flavour(input_str)
|
|
97
111
|
if __check_json(input_str):
|
|
98
|
-
|
|
112
|
+
return __get_sdmx_json_flavour(input_str)
|
|
99
113
|
raise Invalid("Validation Error", "Cannot parse input as SDMX.")
|
|
100
114
|
|
|
101
115
|
|
|
@@ -12,6 +12,7 @@ from pysdmx.io.json.fusion.messages.core import (
|
|
|
12
12
|
FusionString,
|
|
13
13
|
)
|
|
14
14
|
from pysdmx.model import (
|
|
15
|
+
Annotation,
|
|
15
16
|
Code,
|
|
16
17
|
HierarchicalCode,
|
|
17
18
|
)
|
|
@@ -31,6 +32,7 @@ class FusionCode(Struct, frozen=True):
|
|
|
31
32
|
"""Fusion-JSON payload for codes."""
|
|
32
33
|
|
|
33
34
|
id: str
|
|
35
|
+
urn: Optional[str] = None
|
|
34
36
|
annotations: Sequence[FusionAnnotation] = ()
|
|
35
37
|
names: Sequence[FusionString] = ()
|
|
36
38
|
descriptions: Sequence[FusionString] = ()
|
|
@@ -49,10 +51,11 @@ class FusionCode(Struct, frozen=True):
|
|
|
49
51
|
valid_to = self.__handle_date(vals[1]) if vals[1] else None
|
|
50
52
|
return (valid_from, valid_to)
|
|
51
53
|
|
|
52
|
-
def to_model(self) -> Code:
|
|
54
|
+
def to_model(self, extract_urn: bool = False) -> Code:
|
|
53
55
|
"""Converts a FusionCode to a standard code."""
|
|
54
56
|
vp = [a for a in self.annotations if a.type == "FR_VALIDITY_PERIOD"]
|
|
55
57
|
vf, vt = self.__get_val(vp[0]) if vp else (None, None)
|
|
58
|
+
urn = self.urn if extract_urn else None
|
|
56
59
|
return Code(
|
|
57
60
|
id=self.id,
|
|
58
61
|
name=self.names[0].value,
|
|
@@ -61,6 +64,7 @@ class FusionCode(Struct, frozen=True):
|
|
|
61
64
|
),
|
|
62
65
|
valid_from=vf,
|
|
63
66
|
valid_to=vt,
|
|
67
|
+
urn=urn,
|
|
64
68
|
)
|
|
65
69
|
|
|
66
70
|
|
|
@@ -75,7 +79,7 @@ class FusionCodelist(Struct, frozen=True, rename={"agency": "agencyId"}):
|
|
|
75
79
|
version: str = "1.0"
|
|
76
80
|
items: Sequence[FusionCode] = ()
|
|
77
81
|
|
|
78
|
-
def to_model(self) -> CL:
|
|
82
|
+
def to_model(self, extract_urns: bool = False) -> CL:
|
|
79
83
|
"""Converts a JsonCodelist to a standard codelist."""
|
|
80
84
|
t = "codelist" if "Codelist" in self.urn else "valuelist"
|
|
81
85
|
return CL(
|
|
@@ -86,7 +90,7 @@ class FusionCodelist(Struct, frozen=True, rename={"agency": "agencyId"}):
|
|
|
86
90
|
self.descriptions[0].value if self.descriptions else None
|
|
87
91
|
),
|
|
88
92
|
version=self.version,
|
|
89
|
-
items=[i.to_model() for i in self.items],
|
|
93
|
+
items=[i.to_model(extract_urns) for i in self.items],
|
|
90
94
|
sdmx_type=t, # type: ignore[arg-type]
|
|
91
95
|
)
|
|
92
96
|
|
|
@@ -108,6 +112,7 @@ class FusionCodelistMessage(Struct, frozen=True):
|
|
|
108
112
|
class FusionHierarchicalCode(Struct, frozen=True):
|
|
109
113
|
"""Fusion-JSON payload for hierarchical codes."""
|
|
110
114
|
|
|
115
|
+
id: str
|
|
111
116
|
code: str
|
|
112
117
|
validFrom: Optional[int] = None
|
|
113
118
|
validTo: Optional[int] = None
|
|
@@ -140,6 +145,16 @@ class FusionHierarchicalCode(Struct, frozen=True):
|
|
|
140
145
|
rvf = self.__convert_epoch(self.validFrom) if self.validFrom else None
|
|
141
146
|
rvt = self.__convert_epoch(self.validTo) if self.validTo else None
|
|
142
147
|
codes = [c.to_model(codelists) for c in self.codes]
|
|
148
|
+
if self.id != code.id:
|
|
149
|
+
# The ID of the hierarchical code differs from the ID of the
|
|
150
|
+
# code that the hierarchical code references. We therefore need
|
|
151
|
+
# to store the ID of the hierarchical code, else the information
|
|
152
|
+
# will be lost and we won't be able to write the hierarchy back
|
|
153
|
+
# to the Registry, if requested.
|
|
154
|
+
a = Annotation(id="hcode", type="pysdmx", text=self.id)
|
|
155
|
+
annotations = [a]
|
|
156
|
+
else:
|
|
157
|
+
annotations = []
|
|
143
158
|
return HierarchicalCode(
|
|
144
159
|
code.id,
|
|
145
160
|
code.name,
|
|
@@ -149,6 +164,8 @@ class FusionHierarchicalCode(Struct, frozen=True):
|
|
|
149
164
|
rvf,
|
|
150
165
|
rvt,
|
|
151
166
|
codes,
|
|
167
|
+
tuple(annotations),
|
|
168
|
+
code.urn,
|
|
152
169
|
)
|
|
153
170
|
|
|
154
171
|
|
|
@@ -222,7 +239,7 @@ class FusionHierarchyMessage(Struct, frozen=True):
|
|
|
222
239
|
|
|
223
240
|
def to_model(self) -> HCL:
|
|
224
241
|
"""Returns the requested hierarchy."""
|
|
225
|
-
cls = [cl.to_model() for cl in self.Codelist]
|
|
242
|
+
cls = [cl.to_model(True) for cl in self.Codelist]
|
|
226
243
|
return self.Hierarchy[0].to_model(cls)
|
|
227
244
|
|
|
228
245
|
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from typing import Optional, Sequence
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
import msgspec
|
|
6
6
|
|
|
7
7
|
from pysdmx.io.json.fusion.messages.code import FusionCodelist
|
|
8
8
|
from pysdmx.io.json.fusion.messages.core import (
|
|
@@ -13,10 +13,11 @@ from pysdmx.model.concept import Concept, DataType
|
|
|
13
13
|
from pysdmx.model.concept import ConceptScheme as CS
|
|
14
14
|
|
|
15
15
|
|
|
16
|
-
class FusionConcept(Struct, frozen=True):
|
|
16
|
+
class FusionConcept(msgspec.Struct, frozen=True):
|
|
17
17
|
"""Fusion-JSON payload for concepts."""
|
|
18
18
|
|
|
19
19
|
id: str
|
|
20
|
+
urn: str
|
|
20
21
|
names: Sequence[FusionString]
|
|
21
22
|
representation: Optional[FusionRepresentation] = None
|
|
22
23
|
descriptions: Optional[Sequence[FusionString]] = None
|
|
@@ -48,10 +49,13 @@ class FusionConcept(Struct, frozen=True):
|
|
|
48
49
|
description=d,
|
|
49
50
|
codes=c,
|
|
50
51
|
enum_ref=cl_ref,
|
|
52
|
+
urn=self.urn,
|
|
51
53
|
)
|
|
52
54
|
|
|
53
55
|
|
|
54
|
-
class FusionConceptScheme(
|
|
56
|
+
class FusionConceptScheme(
|
|
57
|
+
msgspec.Struct, frozen=True, rename={"agency": "agencyId"}
|
|
58
|
+
):
|
|
55
59
|
"""Fusion-JSON payload for a concept scheme."""
|
|
56
60
|
|
|
57
61
|
id: str
|
|
@@ -64,20 +68,18 @@ class FusionConceptScheme(Struct, frozen=True, rename={"agency": "agencyId"}):
|
|
|
64
68
|
def to_model(self, codelists: Sequence[FusionCodelist]) -> CS:
|
|
65
69
|
"""Converts a FusionConceptScheme to a standard concept scheme."""
|
|
66
70
|
d = self.descriptions[0].value if self.descriptions else None
|
|
71
|
+
concepts = [c.to_model(codelists) for c in self.items]
|
|
67
72
|
return CS(
|
|
68
73
|
id=self.id,
|
|
69
74
|
name=self.names[0].value,
|
|
70
75
|
agency=self.agency,
|
|
71
76
|
description=d,
|
|
72
77
|
version=self.version,
|
|
73
|
-
items=
|
|
78
|
+
items=concepts,
|
|
74
79
|
)
|
|
75
80
|
|
|
76
81
|
|
|
77
|
-
class FusionConceptSchemeMessage(
|
|
78
|
-
Struct,
|
|
79
|
-
frozen=True,
|
|
80
|
-
):
|
|
82
|
+
class FusionConceptSchemeMessage(msgspec.Struct, frozen=True):
|
|
81
83
|
"""Fusion-JSON payload for /conceptscheme queries."""
|
|
82
84
|
|
|
83
85
|
ConceptScheme: Sequence[FusionConceptScheme]
|
|
@@ -15,6 +15,7 @@ from pysdmx.model import (
|
|
|
15
15
|
from pysdmx.model import (
|
|
16
16
|
Dataflow as DF,
|
|
17
17
|
)
|
|
18
|
+
from pysdmx.model.dataflow import Group
|
|
18
19
|
|
|
19
20
|
|
|
20
21
|
class FusionDataflow(Struct, frozen=True, rename={"agency": "agencyId"}):
|
|
@@ -62,7 +63,12 @@ class FusionDataflowMessage(Struct, frozen=True):
|
|
|
62
63
|
return df.agency == agency and df.id == id_
|
|
63
64
|
|
|
64
65
|
def to_model(
|
|
65
|
-
self,
|
|
66
|
+
self,
|
|
67
|
+
components: Components,
|
|
68
|
+
grps: Optional[Sequence[Group]],
|
|
69
|
+
agency: str,
|
|
70
|
+
id_: str,
|
|
71
|
+
version: str,
|
|
66
72
|
) -> DataflowInfo:
|
|
67
73
|
"""Returns the requested dataflow details."""
|
|
68
74
|
prvs: List[DataProvider] = []
|
|
@@ -83,6 +89,7 @@ class FusionDataflowMessage(Struct, frozen=True):
|
|
|
83
89
|
version=df.version,
|
|
84
90
|
providers=prvs,
|
|
85
91
|
dsd_ref=df.dataStructureRef,
|
|
92
|
+
groups=grps,
|
|
86
93
|
)
|
|
87
94
|
|
|
88
95
|
|
|
@@ -110,6 +110,10 @@ class FusionAttribute(Struct, frozen=True):
|
|
|
110
110
|
)
|
|
111
111
|
lvl = self.__derive_level(groups)
|
|
112
112
|
desc = c.descriptions[0].value if c.descriptions else None
|
|
113
|
+
if self.representation and self.representation.representation:
|
|
114
|
+
local_enum_ref = self.representation.representation
|
|
115
|
+
else:
|
|
116
|
+
local_enum_ref = None
|
|
113
117
|
return Component(
|
|
114
118
|
id=self.id,
|
|
115
119
|
required=self.mandatory,
|
|
@@ -122,6 +126,7 @@ class FusionAttribute(Struct, frozen=True):
|
|
|
122
126
|
local_codes=codes,
|
|
123
127
|
attachment_level=lvl,
|
|
124
128
|
array_def=ab,
|
|
129
|
+
local_enum_ref=local_enum_ref,
|
|
125
130
|
)
|
|
126
131
|
|
|
127
132
|
|
|
@@ -160,6 +165,10 @@ class FusionDimension(Struct, frozen=True):
|
|
|
160
165
|
self.id, self.representation, cls, cons
|
|
161
166
|
)
|
|
162
167
|
desc = c.descriptions[0].value if c.descriptions else None
|
|
168
|
+
if self.representation and self.representation.representation:
|
|
169
|
+
local_enum_ref = self.representation.representation
|
|
170
|
+
else:
|
|
171
|
+
local_enum_ref = None
|
|
163
172
|
return Component(
|
|
164
173
|
id=self.id,
|
|
165
174
|
required=True,
|
|
@@ -171,6 +180,7 @@ class FusionDimension(Struct, frozen=True):
|
|
|
171
180
|
description=desc,
|
|
172
181
|
local_codes=codes,
|
|
173
182
|
array_def=ab,
|
|
183
|
+
local_enum_ref=local_enum_ref,
|
|
174
184
|
)
|
|
175
185
|
|
|
176
186
|
|
|
@@ -209,6 +219,10 @@ class FusionMeasure(Struct, frozen=True):
|
|
|
209
219
|
self.id, self.representation, cls, cons
|
|
210
220
|
)
|
|
211
221
|
desc = c.descriptions[0].value if c.descriptions else None
|
|
222
|
+
if self.representation and self.representation.representation:
|
|
223
|
+
local_enum_ref = self.representation.representation
|
|
224
|
+
else:
|
|
225
|
+
local_enum_ref = None
|
|
212
226
|
return Component(
|
|
213
227
|
id=self.id,
|
|
214
228
|
required=self.mandatory,
|
|
@@ -220,6 +234,7 @@ class FusionMeasure(Struct, frozen=True):
|
|
|
220
234
|
description=desc,
|
|
221
235
|
local_codes=codes,
|
|
222
236
|
array_def=ab,
|
|
237
|
+
local_enum_ref=local_enum_ref,
|
|
223
238
|
)
|
|
224
239
|
|
|
225
240
|
|
|
@@ -10,6 +10,7 @@ from pysdmx.io.json.fusion.messages.constraint import FusionContentConstraint
|
|
|
10
10
|
from pysdmx.io.json.fusion.messages.core import FusionLink
|
|
11
11
|
from pysdmx.io.json.fusion.messages.dsd import FusionDataStructure
|
|
12
12
|
from pysdmx.model import Components, HierarchyAssociation, Schema
|
|
13
|
+
from pysdmx.model.dataflow import Group
|
|
13
14
|
from pysdmx.util import parse_item_urn
|
|
14
15
|
|
|
15
16
|
|
|
@@ -44,6 +45,7 @@ class FusionSchemaMessage(msgspec.Struct, frozen=True):
|
|
|
44
45
|
components = self.DataStructure[0].get_components(
|
|
45
46
|
self.ConceptScheme, cls, self.DataConstraint
|
|
46
47
|
)
|
|
48
|
+
grps = self.DataStructure[0].groups
|
|
47
49
|
comp_dict = {c.id: c for c in components}
|
|
48
50
|
urns = [a.urn for a in self.meta.links]
|
|
49
51
|
for ha in hierarchies:
|
|
@@ -60,4 +62,9 @@ class FusionSchemaMessage(msgspec.Struct, frozen=True):
|
|
|
60
62
|
f"{h.agency}:{h.id}({h.version})" # type: ignore[union-attr]
|
|
61
63
|
)
|
|
62
64
|
comps = Components(comp_dict.values())
|
|
63
|
-
|
|
65
|
+
mapped_grps = [
|
|
66
|
+
Group(g.id, dimensions=g.dimensionReferences) for g in grps
|
|
67
|
+
]
|
|
68
|
+
return Schema(
|
|
69
|
+
context, agency, id_, comps, version, urns, groups=mapped_grps
|
|
70
|
+
)
|