pysdmx 1.3.0__py3-none-any.whl → 1.4.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pysdmx/__extras_check.py +3 -2
- pysdmx/__init__.py +1 -1
- pysdmx/api/fmr/__init__.py +4 -4
- pysdmx/api/gds/__init__.py +328 -0
- pysdmx/api/qb/gds.py +153 -0
- pysdmx/api/qb/service.py +91 -3
- pysdmx/api/qb/structure.py +1 -0
- pysdmx/api/qb/util.py +1 -0
- pysdmx/io/__init__.py +2 -1
- pysdmx/io/csv/sdmx10/reader/__init__.py +4 -2
- pysdmx/io/csv/sdmx10/writer/__init__.py +15 -2
- pysdmx/io/csv/sdmx20/reader/__init__.py +5 -2
- pysdmx/io/csv/sdmx20/writer/__init__.py +13 -2
- pysdmx/io/format.py +4 -0
- pysdmx/io/input_processor.py +12 -3
- pysdmx/io/json/fusion/messages/core.py +2 -0
- pysdmx/io/json/fusion/messages/report.py +13 -7
- pysdmx/io/json/gds/messages/__init__.py +35 -0
- pysdmx/io/json/gds/messages/agencies.py +41 -0
- pysdmx/io/json/gds/messages/catalog.py +79 -0
- pysdmx/io/json/gds/messages/sdmx_api.py +23 -0
- pysdmx/io/json/gds/messages/services.py +49 -0
- pysdmx/io/json/gds/messages/urn_resolver.py +43 -0
- pysdmx/io/json/gds/reader/__init__.py +12 -0
- pysdmx/io/json/sdmxjson2/messages/__init__.py +12 -4
- pysdmx/io/json/sdmxjson2/messages/agency.py +72 -0
- pysdmx/io/json/sdmxjson2/messages/category.py +22 -29
- pysdmx/io/json/sdmxjson2/messages/code.py +68 -64
- pysdmx/io/json/sdmxjson2/messages/concept.py +9 -18
- pysdmx/io/json/sdmxjson2/messages/constraint.py +2 -13
- pysdmx/io/json/sdmxjson2/messages/core.py +113 -21
- pysdmx/io/json/sdmxjson2/messages/dataflow.py +51 -21
- pysdmx/io/json/sdmxjson2/messages/dsd.py +110 -36
- pysdmx/io/json/sdmxjson2/messages/map.py +61 -49
- pysdmx/io/json/sdmxjson2/messages/pa.py +9 -17
- pysdmx/io/json/sdmxjson2/messages/provider.py +88 -0
- pysdmx/io/json/sdmxjson2/messages/report.py +84 -14
- pysdmx/io/json/sdmxjson2/messages/schema.py +14 -5
- pysdmx/io/json/sdmxjson2/messages/structure.py +105 -36
- pysdmx/io/json/sdmxjson2/messages/vtl.py +42 -96
- pysdmx/io/pd.py +2 -9
- pysdmx/io/reader.py +72 -27
- pysdmx/io/serde.py +11 -0
- pysdmx/io/writer.py +134 -0
- pysdmx/io/xml/{sdmx21/reader/__data_aux.py → __data_aux.py} +9 -2
- pysdmx/io/xml/{sdmx21/reader/__parse_xml.py → __parse_xml.py} +30 -6
- pysdmx/io/xml/__ss_aux_reader.py +96 -0
- pysdmx/io/xml/__structure_aux_reader.py +1174 -0
- pysdmx/io/xml/__structure_aux_writer.py +1233 -0
- pysdmx/io/xml/{sdmx21/__tokens.py → __tokens.py} +33 -1
- pysdmx/io/xml/{sdmx21/writer/__write_aux.py → __write_aux.py} +129 -37
- pysdmx/io/xml/{sdmx21/writer/__write_data_aux.py → __write_data_aux.py} +1 -1
- pysdmx/io/xml/__write_structure_specific_aux.py +254 -0
- pysdmx/io/xml/{sdmx21/reader/doc_validation.py → doc_validation.py} +10 -2
- pysdmx/io/xml/{sdmx21/reader/header.py → header.py} +11 -3
- pysdmx/io/xml/sdmx21/reader/error.py +2 -2
- pysdmx/io/xml/sdmx21/reader/generic.py +12 -8
- pysdmx/io/xml/sdmx21/reader/structure.py +5 -840
- pysdmx/io/xml/sdmx21/reader/structure_specific.py +13 -97
- pysdmx/io/xml/sdmx21/reader/submission.py +2 -2
- pysdmx/io/xml/sdmx21/writer/error.py +1 -1
- pysdmx/io/xml/sdmx21/writer/generic.py +13 -7
- pysdmx/io/xml/sdmx21/writer/structure.py +16 -828
- pysdmx/io/xml/sdmx21/writer/structure_specific.py +13 -238
- pysdmx/io/xml/sdmx30/__init__.py +1 -0
- pysdmx/io/xml/sdmx30/reader/__init__.py +1 -0
- pysdmx/io/xml/sdmx30/reader/structure.py +39 -0
- pysdmx/io/xml/sdmx30/reader/structure_specific.py +39 -0
- pysdmx/io/xml/sdmx30/writer/__init__.py +1 -0
- pysdmx/io/xml/sdmx30/writer/structure.py +67 -0
- pysdmx/io/xml/sdmx30/writer/structure_specific.py +108 -0
- pysdmx/model/__base.py +99 -34
- pysdmx/model/__init__.py +4 -0
- pysdmx/model/category.py +20 -0
- pysdmx/model/code.py +29 -8
- pysdmx/model/concept.py +52 -11
- pysdmx/model/dataflow.py +117 -33
- pysdmx/model/dataset.py +66 -14
- pysdmx/model/gds.py +161 -0
- pysdmx/model/map.py +51 -8
- pysdmx/model/message.py +235 -55
- pysdmx/model/metadata.py +79 -16
- pysdmx/model/submission.py +12 -7
- pysdmx/model/vtl.py +30 -13
- pysdmx/toolkit/__init__.py +1 -1
- pysdmx/toolkit/pd/__init__.py +85 -0
- pysdmx/toolkit/vtl/__init__.py +2 -1
- pysdmx/toolkit/vtl/_validations.py +1 -1
- pysdmx/toolkit/vtl/{generate_vtl_script.py → script_generation.py} +30 -4
- pysdmx/toolkit/vtl/validation.py +119 -0
- pysdmx/util/_model_utils.py +1 -1
- pysdmx-1.4.0rc1.dist-info/METADATA +119 -0
- pysdmx-1.4.0rc1.dist-info/RECORD +140 -0
- pysdmx/io/json/sdmxjson2/messages/org.py +0 -140
- pysdmx/toolkit/vtl/model_validations.py +0 -50
- pysdmx-1.3.0.dist-info/METADATA +0 -76
- pysdmx-1.3.0.dist-info/RECORD +0 -116
- /pysdmx/io/xml/{sdmx21/writer/config.py → config.py} +0 -0
- {pysdmx-1.3.0.dist-info → pysdmx-1.4.0rc1.dist-info}/LICENSE +0 -0
- {pysdmx-1.3.0.dist-info → pysdmx-1.4.0rc1.dist-info}/WHEEL +0 -0
|
@@ -12,6 +12,7 @@ REQUIRED = "required"
|
|
|
12
12
|
POSITION = "position"
|
|
13
13
|
CLASS = "class"
|
|
14
14
|
PACKAGE = "package"
|
|
15
|
+
LINK = "Link"
|
|
15
16
|
|
|
16
17
|
# Structure Specific
|
|
17
18
|
STR_SPE = "StructureSpecificData"
|
|
@@ -28,6 +29,7 @@ STR_REF = "structureRef"
|
|
|
28
29
|
STRUCTURE = "Structure"
|
|
29
30
|
STR_USAGE = "StructureUsage"
|
|
30
31
|
PROV_AGREMENT = "ProvisionAgrement"
|
|
32
|
+
PROV_AGREEMENT = "ProvisionAgreement"
|
|
31
33
|
STR_ID = "structureID"
|
|
32
34
|
STR_TYPE = "structure_type"
|
|
33
35
|
DIM_OBS = "dimensionAtObservation"
|
|
@@ -43,6 +45,7 @@ SOURCE = "Source"
|
|
|
43
45
|
HEADER_ID = "ID"
|
|
44
46
|
NAMESPACE = "namespace"
|
|
45
47
|
NAMES = "names"
|
|
48
|
+
REGISTRY_LOW = "registry"
|
|
46
49
|
|
|
47
50
|
|
|
48
51
|
# SDMX Error handling
|
|
@@ -80,9 +83,11 @@ ANNOTATIONS = "Annotations"
|
|
|
80
83
|
ANNOTATIONS_LOW = "annotations"
|
|
81
84
|
STRUCTURES = "Structures"
|
|
82
85
|
ORGS = "OrganisationSchemes"
|
|
83
|
-
AGENCIES = "
|
|
86
|
+
AGENCIES = "AgencySchemes"
|
|
87
|
+
AGENCY_SCHEME = "AgencyScheme"
|
|
84
88
|
CODELISTS = "Codelists"
|
|
85
89
|
CONCEPTS = "Concepts"
|
|
90
|
+
CON_SCHEMES = "ConceptSchemes"
|
|
86
91
|
DSDS = "DataStructures"
|
|
87
92
|
DATAFLOWS = "Dataflows"
|
|
88
93
|
CONSTRAINTS = "Constraints"
|
|
@@ -99,6 +104,7 @@ CS_LOW = "concept_scheme"
|
|
|
99
104
|
CON = "Concept"
|
|
100
105
|
CON_LOW = "concept"
|
|
101
106
|
DSD = "DataStructure"
|
|
107
|
+
DSD_LOW = "datastructure"
|
|
102
108
|
|
|
103
109
|
# DSD components
|
|
104
110
|
DSD_COMPS = "DataStructureComponents"
|
|
@@ -119,17 +125,24 @@ ATT_LVL = "attachment_level"
|
|
|
119
125
|
ATT_REL_LOW = "attribute_relationship"
|
|
120
126
|
ATT_REL = "AttributeRelationship"
|
|
121
127
|
AS_STATUS = "assignmentStatus"
|
|
128
|
+
USAGE = "usage"
|
|
122
129
|
MANDATORY = "Mandatory"
|
|
130
|
+
MANDATORY_LOW = "mandatory"
|
|
123
131
|
CONDITIONAL = "Conditional"
|
|
132
|
+
OPTIONAL_LOW = "optional"
|
|
133
|
+
ME_REL = "MeasureRelationship"
|
|
134
|
+
OBSERVATION = "Observation"
|
|
124
135
|
# Measure
|
|
125
136
|
ME_LIST = "MeasureList"
|
|
126
137
|
ME_LIST_LOW = "measure_list"
|
|
127
138
|
MEASURE = "Measure"
|
|
128
139
|
PRIM_MEASURE = "PrimaryMeasure"
|
|
140
|
+
MEASURE_RELATIONSHIP = "MeasureRelationship"
|
|
129
141
|
# Group Dimension
|
|
130
142
|
GROUP = "Group"
|
|
131
143
|
GROUP_DIM_LOW = "group_dimension_descriptor"
|
|
132
144
|
GROUP_DIM = "GroupDimension"
|
|
145
|
+
ATTACH_GROUP = "AttachmentGroup"
|
|
133
146
|
DIM_REF = "DimensionReference"
|
|
134
147
|
|
|
135
148
|
# Constraints
|
|
@@ -200,6 +213,7 @@ PAR_ID = "maintainableParentID"
|
|
|
200
213
|
PAR_VER = "maintainableParentVersion"
|
|
201
214
|
REL_TO = "relatedTo"
|
|
202
215
|
NO_REL = "NoSpecifiedRelationship"
|
|
216
|
+
METADATA = "Metadata"
|
|
203
217
|
|
|
204
218
|
# To exclude from attached_attributes
|
|
205
219
|
EXCLUDED_ATTRIBUTES = [STR_REF, "action", "dataScope", "xsi:type", SERIES, OBS]
|
|
@@ -238,15 +252,33 @@ DFWS = "Dataflows"
|
|
|
238
252
|
DFWS_LOW = "dataflows"
|
|
239
253
|
RULESETS = "Rulesets"
|
|
240
254
|
RULE_SCHEME = "RulesetScheme"
|
|
255
|
+
RULE_SCHEMES = "RulesetSchemes"
|
|
241
256
|
RULE = "Ruleset"
|
|
242
257
|
UDOS = "UserDefinedOperators"
|
|
243
258
|
UDO_SCHEME = "UserDefinedOperatorScheme"
|
|
259
|
+
UDO_SCHEMES = "UserDefinedOperatorSchemes"
|
|
244
260
|
UDO = "UserDefinedOperator"
|
|
245
261
|
TRANSFORMATIONS = "Transformations"
|
|
246
262
|
TRANS_SCHEME = "TransformationScheme"
|
|
263
|
+
TRANS_SCHEMES = "TransformationSchemes"
|
|
247
264
|
TRANSFORMATION = "Transformation"
|
|
248
265
|
VTLMAPPINGS = "VtlMappings"
|
|
266
|
+
VTLMAPPING_SCHEMES = "VtlMappingSchemes"
|
|
249
267
|
VTLMAPPING = "VtlMapping"
|
|
250
268
|
VTL_DFW_MAPP = "VtlDataflowMapping"
|
|
269
|
+
VTL_CL_MAPP = "VtlCodelistMapping"
|
|
270
|
+
VTL_CON_MAPP = "VtlConceptMapping"
|
|
251
271
|
VTL_MAPPING_SCHEME = "VtlMappingScheme"
|
|
252
272
|
DFW_ALIAS_LOW = "dataflow_alias"
|
|
273
|
+
NAME_PER_SCHEME = "NamePersonalisationScheme"
|
|
274
|
+
NAME_PER_SCHEMES = "NamePersonalisationSchemes"
|
|
275
|
+
NAME_PER = "NamePersonalisation"
|
|
276
|
+
NAME_PERS = "NamePersonalisations"
|
|
277
|
+
CUSTOM_TYPE_SCHEME = "CustomTypeScheme"
|
|
278
|
+
CUSTOM_TYPE_SCHEMES = "CustomTypeSchemes"
|
|
279
|
+
CUSTOM_TYPE = "CustomType"
|
|
280
|
+
CUSTOM_TYPES = "CustomTypes"
|
|
281
|
+
VALUE_LISTS = "ValueLists"
|
|
282
|
+
VALUE_LIST = "ValueList"
|
|
283
|
+
VALUE_LIST_LOW = "valuelist"
|
|
284
|
+
VALUE_ITEM = "ValueItem"
|
|
@@ -8,21 +8,30 @@ from xml.sax.saxutils import escape
|
|
|
8
8
|
|
|
9
9
|
from pysdmx.errors import Invalid, NotImplemented
|
|
10
10
|
from pysdmx.io.format import Format
|
|
11
|
-
from pysdmx.io.xml.
|
|
11
|
+
from pysdmx.io.xml.__tokens import (
|
|
12
12
|
ANNOTATIONS_LOW,
|
|
13
13
|
CONTACTS_LOW,
|
|
14
|
+
CUSTOM_TYPE_SCHEMES,
|
|
15
|
+
CUSTOM_TYPES,
|
|
14
16
|
DESC_LOW,
|
|
15
17
|
DFW,
|
|
16
18
|
DFWS_LOW,
|
|
17
19
|
DSD,
|
|
20
|
+
NAME_PER_SCHEMES,
|
|
21
|
+
NAME_PERS,
|
|
22
|
+
PROV_AGREEMENT,
|
|
18
23
|
PROV_AGREMENT,
|
|
24
|
+
RULE_SCHEMES,
|
|
19
25
|
RULESETS,
|
|
20
26
|
STR_USAGE,
|
|
21
27
|
STRUCTURE,
|
|
28
|
+
TRANS_SCHEMES,
|
|
22
29
|
TRANSFORMATIONS,
|
|
30
|
+
UDO_SCHEMES,
|
|
23
31
|
UDOS,
|
|
24
32
|
URI_LOW,
|
|
25
33
|
URN_LOW,
|
|
34
|
+
VTLMAPPING_SCHEMES,
|
|
26
35
|
VTLMAPPINGS,
|
|
27
36
|
)
|
|
28
37
|
from pysdmx.model import Organisation
|
|
@@ -36,6 +45,8 @@ MESSAGE_TYPE_MAPPING = {
|
|
|
36
45
|
Format.STRUCTURE_SDMX_ML_2_1: "Structure",
|
|
37
46
|
Format.ERROR_SDMX_ML_2_1: "Error",
|
|
38
47
|
Format.REGISTRY_SDMX_ML_2_1: "RegistryInterface",
|
|
48
|
+
Format.DATA_SDMX_ML_3_0: "StructureSpecificData",
|
|
49
|
+
Format.STRUCTURE_SDMX_ML_3_0: "Structure",
|
|
39
50
|
}
|
|
40
51
|
|
|
41
52
|
ABBR_MSG = "mes"
|
|
@@ -47,26 +58,40 @@ ABBR_SPE = "ss"
|
|
|
47
58
|
ANNOTATIONS = "Annotations"
|
|
48
59
|
STRUCTURES = "Structures"
|
|
49
60
|
ORGS = "OrganisationSchemes"
|
|
61
|
+
AGC = "AgencySchemes"
|
|
50
62
|
AGENCIES = "AgencyScheme"
|
|
51
63
|
CODELISTS = "Codelists"
|
|
52
64
|
CONCEPTS = "Concepts"
|
|
65
|
+
CONCEPTS_SCHEMES = "ConceptSchemes"
|
|
53
66
|
DSDS = "DataStructures"
|
|
54
67
|
DATAFLOWS = "Dataflows"
|
|
55
68
|
CONSTRAINTS = "Constraints"
|
|
56
69
|
ALL_DIM = "AllDimensions"
|
|
57
70
|
|
|
58
|
-
|
|
71
|
+
BASE_URL_21 = "http://www.sdmx.org/resources/sdmxml/schemas/v2_1"
|
|
59
72
|
|
|
60
|
-
|
|
73
|
+
NAMESPACES_21 = {
|
|
61
74
|
"xsi": "http://www.w3.org/2001/XMLSchema-instance",
|
|
62
|
-
ABBR_MSG: f"{
|
|
63
|
-
ABBR_GEN: f"{
|
|
64
|
-
ABBR_COM: f"{
|
|
65
|
-
ABBR_STR: f"{
|
|
66
|
-
ABBR_SPE: f"{
|
|
75
|
+
ABBR_MSG: f"{BASE_URL_21}/message",
|
|
76
|
+
ABBR_GEN: f"{BASE_URL_21}/data/generic",
|
|
77
|
+
ABBR_COM: f"{BASE_URL_21}/common",
|
|
78
|
+
ABBR_STR: f"{BASE_URL_21}/structure",
|
|
79
|
+
ABBR_SPE: f"{BASE_URL_21}/data/structurespecific",
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
BASE_URL_30 = "http://www.sdmx.org/resources/sdmxml/schemas/v3_0"
|
|
83
|
+
|
|
84
|
+
NAMESPACES_30 = {
|
|
85
|
+
"xsi": "http://www.w3.org/2001/XMLSchema-instance",
|
|
86
|
+
ABBR_MSG: f"{BASE_URL_30}/message",
|
|
87
|
+
ABBR_COM: f"{BASE_URL_30}/common",
|
|
88
|
+
ABBR_STR: f"{BASE_URL_30}/structure",
|
|
89
|
+
ABBR_SPE: f"{BASE_URL_30}/data/structurespecific",
|
|
67
90
|
}
|
|
68
91
|
|
|
69
92
|
URN_DS_BASE = "urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure="
|
|
93
|
+
URN_PROVISION = "urn:sdmx:org.sdmx.infomodel.registry.ProvisionAgreement="
|
|
94
|
+
URN_DFW = "urn:sdmx:org.sdmx.infomodel.datastructure.Dataflow="
|
|
70
95
|
|
|
71
96
|
|
|
72
97
|
def __namespaces_from_type(type_: Format) -> str:
|
|
@@ -82,11 +107,15 @@ def __namespaces_from_type(type_: Format) -> str:
|
|
|
82
107
|
NotImplemented: If the MessageType is not implemented
|
|
83
108
|
"""
|
|
84
109
|
if type_ == Format.STRUCTURE_SDMX_ML_2_1:
|
|
85
|
-
return f"xmlns:{ABBR_STR}={
|
|
110
|
+
return f"xmlns:{ABBR_STR}={NAMESPACES_21[ABBR_STR]!r} "
|
|
86
111
|
elif type_ == Format.DATA_SDMX_ML_2_1_STR:
|
|
87
|
-
return f"xmlns:{ABBR_SPE}={
|
|
112
|
+
return f"xmlns:{ABBR_SPE}={NAMESPACES_21[ABBR_SPE]!r} "
|
|
88
113
|
elif type_ == Format.DATA_SDMX_ML_2_1_GEN:
|
|
89
|
-
return f"xmlns:{ABBR_GEN}={
|
|
114
|
+
return f"xmlns:{ABBR_GEN}={NAMESPACES_21[ABBR_GEN]!r} "
|
|
115
|
+
elif type_ == Format.DATA_SDMX_ML_3_0:
|
|
116
|
+
return f"xmlns:{ABBR_SPE}={NAMESPACES_30[ABBR_SPE]!r} "
|
|
117
|
+
elif type_ == Format.STRUCTURE_SDMX_ML_3_0:
|
|
118
|
+
return f"xmlns:{ABBR_STR}={NAMESPACES_30[ABBR_STR]!r} "
|
|
90
119
|
else:
|
|
91
120
|
raise NotImplemented(f"{type_} not implemented")
|
|
92
121
|
|
|
@@ -109,20 +138,34 @@ def create_namespaces(
|
|
|
109
138
|
outfile = f'<?xml version="1.0" encoding="UTF-8"?>{nl}'
|
|
110
139
|
|
|
111
140
|
outfile += f"<{ABBR_MSG}:{MESSAGE_TYPE_MAPPING[type_]} "
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
f"xmlns:
|
|
117
|
-
f"{
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
141
|
+
if (
|
|
142
|
+
type_ == Format.DATA_SDMX_ML_3_0
|
|
143
|
+
or type_ == Format.STRUCTURE_SDMX_ML_3_0
|
|
144
|
+
):
|
|
145
|
+
outfile += f"xmlns:xsi={NAMESPACES_30['xsi']!r} "
|
|
146
|
+
outfile += f"xmlns:{ABBR_MSG}={NAMESPACES_30[ABBR_MSG]!r} "
|
|
147
|
+
outfile += __namespaces_from_type(type_)
|
|
148
|
+
outfile += (
|
|
149
|
+
f"xmlns:{ABBR_COM}={NAMESPACES_30[ABBR_COM]!r} "
|
|
150
|
+
f"{ss_namespaces}"
|
|
151
|
+
f'xsi:schemaLocation="{NAMESPACES_30[ABBR_MSG]} '
|
|
152
|
+
f'https://registry.sdmx.org/schemas/v3_0/SDMXMessage.xsd">'
|
|
153
|
+
)
|
|
154
|
+
else:
|
|
155
|
+
outfile += f"xmlns:xsi={NAMESPACES_21['xsi']!r} "
|
|
156
|
+
outfile += f"xmlns:{ABBR_MSG}={NAMESPACES_21[ABBR_MSG]!r} "
|
|
157
|
+
outfile += __namespaces_from_type(type_)
|
|
158
|
+
outfile += (
|
|
159
|
+
f"xmlns:{ABBR_COM}={NAMESPACES_21[ABBR_COM]!r} "
|
|
160
|
+
f"{ss_namespaces}"
|
|
161
|
+
f'xsi:schemaLocation="{NAMESPACES_21[ABBR_MSG]} '
|
|
162
|
+
f'https://registry.sdmx.org/schemas/v2_1/SDMXMessage.xsd">'
|
|
163
|
+
)
|
|
121
164
|
|
|
122
165
|
return outfile.replace("'", '"')
|
|
123
166
|
|
|
124
167
|
|
|
125
|
-
|
|
168
|
+
MSG_CONTENT_PKG_21 = OrderedDict(
|
|
126
169
|
[
|
|
127
170
|
(ORGS, "OrganisationSchemes"),
|
|
128
171
|
(DATAFLOWS, "Dataflows"),
|
|
@@ -130,10 +173,30 @@ MSG_CONTENT_PKG = OrderedDict(
|
|
|
130
173
|
(CONCEPTS, "Concepts"),
|
|
131
174
|
(DSDS, "DataStructures"),
|
|
132
175
|
(CONSTRAINTS, "ContentConstraints"),
|
|
176
|
+
(CUSTOM_TYPES, "CustomTypes"),
|
|
177
|
+
(VTLMAPPINGS, "VtlMappings"),
|
|
178
|
+
(NAME_PERS, "NamePersonalisations"),
|
|
133
179
|
(RULESETS, "Rulesets"),
|
|
134
180
|
(TRANSFORMATIONS, "Transformations"),
|
|
135
181
|
(UDOS, "UserDefinedOperators"),
|
|
136
|
-
|
|
182
|
+
]
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
MSG_CONTENT_PKG_30 = OrderedDict(
|
|
187
|
+
[
|
|
188
|
+
(AGC, "AgencySchemes"),
|
|
189
|
+
(DATAFLOWS, "Dataflows"),
|
|
190
|
+
(CODELISTS, "Codelists"),
|
|
191
|
+
(CONCEPTS_SCHEMES, "ConceptSchemes"),
|
|
192
|
+
(DSDS, "DataStructures"),
|
|
193
|
+
(CONSTRAINTS, "ContentConstraints"),
|
|
194
|
+
(CUSTOM_TYPE_SCHEMES, "CustomTypeSchemes"),
|
|
195
|
+
(VTLMAPPING_SCHEMES, "VtlMappingSchemes"),
|
|
196
|
+
(NAME_PER_SCHEMES, "NamePersonalisationSchemes"),
|
|
197
|
+
(RULE_SCHEMES, "RulesetSchemes"),
|
|
198
|
+
(TRANS_SCHEMES, "TransformationSchemes"),
|
|
199
|
+
(UDO_SCHEMES, "UserDefinedOperatorSchemes"),
|
|
137
200
|
]
|
|
138
201
|
)
|
|
139
202
|
|
|
@@ -236,6 +299,7 @@ def __reference(
|
|
|
236
299
|
nl: str,
|
|
237
300
|
prettyprint: bool,
|
|
238
301
|
add_namespace_structure: bool,
|
|
302
|
+
references_30: bool = False,
|
|
239
303
|
) -> str:
|
|
240
304
|
child2 = "\t\t" if prettyprint else ""
|
|
241
305
|
child3 = "\t\t\t" if prettyprint else ""
|
|
@@ -244,16 +308,49 @@ def __reference(
|
|
|
244
308
|
reference = parse_short_urn(urn_structure)
|
|
245
309
|
if reference.sdmx_type == DSD:
|
|
246
310
|
structure_type = STRUCTURE
|
|
311
|
+
urn_type = URN_DS_BASE
|
|
247
312
|
elif reference.sdmx_type == DFW:
|
|
248
313
|
structure_type = STR_USAGE
|
|
314
|
+
urn_type = URN_DFW
|
|
249
315
|
else:
|
|
250
|
-
structure_type = PROV_AGREMENT
|
|
316
|
+
structure_type = PROV_AGREEMENT if references_30 else PROV_AGREMENT
|
|
317
|
+
urn_type = URN_PROVISION
|
|
251
318
|
if add_namespace_structure:
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
319
|
+
if references_30:
|
|
320
|
+
namespace = (
|
|
321
|
+
f"{urn_type}{reference.agency}:{reference.id}"
|
|
322
|
+
f"({reference.version})"
|
|
323
|
+
)
|
|
324
|
+
else:
|
|
325
|
+
namespace = (
|
|
326
|
+
f"{URN_DS_BASE}{reference.agency}:{reference.id}"
|
|
327
|
+
f"({reference.version})"
|
|
328
|
+
)
|
|
329
|
+
|
|
256
330
|
namespace = f"namespace={namespace!r} "
|
|
331
|
+
if references_30:
|
|
332
|
+
reference_str = (
|
|
333
|
+
f"{urn_type}{reference.agency}:{reference.id}({reference.version})"
|
|
334
|
+
)
|
|
335
|
+
else:
|
|
336
|
+
# Then the reference
|
|
337
|
+
reference_str = (
|
|
338
|
+
f"{nl}{child4}<Ref agencyID={reference.agency!r} "
|
|
339
|
+
f"id={reference.id!r} version={reference.version!r} "
|
|
340
|
+
f"class={reference.sdmx_type!r}/>"
|
|
341
|
+
)
|
|
342
|
+
if references_30:
|
|
343
|
+
common_structure = (
|
|
344
|
+
f"{nl}{child3}<{ABBR_COM}:{structure_type}>"
|
|
345
|
+
f"{reference_str}"
|
|
346
|
+
f"</{ABBR_COM}:{structure_type}>"
|
|
347
|
+
)
|
|
348
|
+
else:
|
|
349
|
+
common_structure = (
|
|
350
|
+
f"{nl}{child3}<{ABBR_COM}:{structure_type}>"
|
|
351
|
+
f"{reference_str}"
|
|
352
|
+
f"{nl}{child3}</{ABBR_COM}:{structure_type}>"
|
|
353
|
+
)
|
|
257
354
|
|
|
258
355
|
return (
|
|
259
356
|
# First the message structure
|
|
@@ -262,13 +359,7 @@ def __reference(
|
|
|
262
359
|
f"{namespace}"
|
|
263
360
|
f"dimensionAtObservation={dimension!r}>"
|
|
264
361
|
# Then the common structure
|
|
265
|
-
f"{
|
|
266
|
-
# Then the reference
|
|
267
|
-
f"{nl}{child4}<Ref agencyID={reference.agency!r} "
|
|
268
|
-
f"id={reference.id!r} version={reference.version!r} "
|
|
269
|
-
f"class={reference.sdmx_type!r}/>"
|
|
270
|
-
# Close the common structure
|
|
271
|
-
f"{nl}{child3}</{ABBR_COM}:{structure_type}>"
|
|
362
|
+
f"{common_structure}"
|
|
272
363
|
# Close the message structure
|
|
273
364
|
f"{nl}{child2}</{ABBR_MSG}:Structure>"
|
|
274
365
|
)
|
|
@@ -279,6 +370,7 @@ def __write_header(
|
|
|
279
370
|
prettyprint: bool,
|
|
280
371
|
add_namespace_structure: bool = False,
|
|
281
372
|
data_message: bool = True,
|
|
373
|
+
references_30: bool = False,
|
|
282
374
|
) -> str:
|
|
283
375
|
"""Writes the Header part of the message.
|
|
284
376
|
|
|
@@ -287,6 +379,7 @@ def __write_header(
|
|
|
287
379
|
prettyprint: Prettyprint or not
|
|
288
380
|
add_namespace_structure: Add the namespace for the structure
|
|
289
381
|
data_message: If the message is a data message
|
|
382
|
+
references_30: If the references are for SDMX 3.0
|
|
290
383
|
|
|
291
384
|
Returns:
|
|
292
385
|
The XML string
|
|
@@ -308,9 +401,7 @@ def __write_header(
|
|
|
308
401
|
return ""
|
|
309
402
|
child2 = "\t\t" if prettyprint else ""
|
|
310
403
|
return (
|
|
311
|
-
f"{nl}{child2}<{ABBR_MSG}:{element}>"
|
|
312
|
-
f"{value}"
|
|
313
|
-
f"</{ABBR_MSG}:{element}>"
|
|
404
|
+
f"{nl}{child2}<{ABBR_MSG}:{element}>{value}</{ABBR_MSG}:{element}>"
|
|
314
405
|
)
|
|
315
406
|
|
|
316
407
|
nl = "\n" if prettyprint else ""
|
|
@@ -331,6 +422,7 @@ def __write_header(
|
|
|
331
422
|
nl,
|
|
332
423
|
prettyprint,
|
|
333
424
|
add_namespace_structure,
|
|
425
|
+
references_30,
|
|
334
426
|
)
|
|
335
427
|
if not data_message and (
|
|
336
428
|
header.dataset_id or header.dataset_action or header.structure
|
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
# mypy: disable-error-code="union-attr"
|
|
2
|
+
"""Module for writing SDMX-ML 3.0 Structure Specific auxiliary functions."""
|
|
3
|
+
|
|
4
|
+
from typing import Any, Dict, List
|
|
5
|
+
|
|
6
|
+
import pandas as pd
|
|
7
|
+
|
|
8
|
+
from pysdmx.io.pd import PandasDataset
|
|
9
|
+
from pysdmx.io.xml.__write_aux import (
|
|
10
|
+
ABBR_MSG,
|
|
11
|
+
ALL_DIM,
|
|
12
|
+
__escape_xml,
|
|
13
|
+
get_structure,
|
|
14
|
+
)
|
|
15
|
+
from pysdmx.io.xml.__write_data_aux import (
|
|
16
|
+
get_codes,
|
|
17
|
+
writing_validation,
|
|
18
|
+
)
|
|
19
|
+
from pysdmx.io.xml.config import CHUNKSIZE
|
|
20
|
+
from pysdmx.util import parse_short_urn
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def __memory_optimization_writing(
|
|
24
|
+
dataset: PandasDataset, prettyprint: bool
|
|
25
|
+
) -> str:
|
|
26
|
+
"""Memory optimization for writing data."""
|
|
27
|
+
outfile = ""
|
|
28
|
+
length_ = len(dataset.data)
|
|
29
|
+
if len(dataset.data) > CHUNKSIZE:
|
|
30
|
+
previous = 0
|
|
31
|
+
next_ = CHUNKSIZE
|
|
32
|
+
while previous <= length_:
|
|
33
|
+
# Sliding a window for efficient access to the data
|
|
34
|
+
# and avoid memory issues
|
|
35
|
+
outfile += __obs_processing(
|
|
36
|
+
dataset.data.iloc[previous:next_], prettyprint
|
|
37
|
+
)
|
|
38
|
+
previous = next_
|
|
39
|
+
next_ += CHUNKSIZE
|
|
40
|
+
|
|
41
|
+
if next_ >= length_:
|
|
42
|
+
outfile += __obs_processing(
|
|
43
|
+
dataset.data.iloc[previous:], prettyprint
|
|
44
|
+
)
|
|
45
|
+
previous = next_
|
|
46
|
+
else:
|
|
47
|
+
outfile += __obs_processing(dataset.data, prettyprint)
|
|
48
|
+
|
|
49
|
+
return outfile
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def __write_data_structure_specific(
|
|
53
|
+
datasets: Dict[str, PandasDataset],
|
|
54
|
+
dim_mapping: Dict[str, str],
|
|
55
|
+
prettyprint: bool = True,
|
|
56
|
+
references_30: bool = False,
|
|
57
|
+
) -> str:
|
|
58
|
+
"""Write data to SDMX-ML Structure-Specific format.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
datasets: dict. Datasets to be written.
|
|
62
|
+
dim_mapping: dict. URN-DimensionAtObservation mapping.
|
|
63
|
+
prettyprint: bool. Prettyprint or not.
|
|
64
|
+
references_30: bool. Whether to use SDMX 3.0 references.
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
The data in SDMX-ML Structure-Specific format, as string.
|
|
68
|
+
"""
|
|
69
|
+
outfile = ""
|
|
70
|
+
|
|
71
|
+
for i, (short_urn, dataset) in enumerate(datasets.items()):
|
|
72
|
+
dataset.data = dataset.data.astype(str).replace(
|
|
73
|
+
{"nan": "", "<NA>": ""}
|
|
74
|
+
)
|
|
75
|
+
outfile += __write_data_single_dataset(
|
|
76
|
+
dataset=dataset,
|
|
77
|
+
prettyprint=prettyprint,
|
|
78
|
+
count=i + 1,
|
|
79
|
+
dim=dim_mapping[short_urn],
|
|
80
|
+
references_30=references_30,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
return outfile
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def __write_data_single_dataset(
|
|
87
|
+
dataset: PandasDataset,
|
|
88
|
+
prettyprint: bool = True,
|
|
89
|
+
count: int = 1,
|
|
90
|
+
dim: str = ALL_DIM,
|
|
91
|
+
references_30: bool = False,
|
|
92
|
+
) -> str:
|
|
93
|
+
"""Write data to SDMX-ML Structure-Specific format.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
dataset: PandasDataset. Dataset to be written.
|
|
97
|
+
prettyprint: bool. Prettyprint or not.
|
|
98
|
+
count: int. Count for namespace.
|
|
99
|
+
dim: str. Dimension to be written.
|
|
100
|
+
references_30: bool. Whether to use SDMX 3.0 references.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
The data in SDMX-ML Structure-Specific format, as string.
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
def __remove_optional_attributes_empty_data(str_to_check: str) -> str:
|
|
107
|
+
"""This function removes data when optional attributes are found."""
|
|
108
|
+
for att in dataset.structure.components.attributes:
|
|
109
|
+
if not att.required:
|
|
110
|
+
str_to_check = str_to_check.replace(f"{att.id}='' ", "")
|
|
111
|
+
str_to_check = str_to_check.replace(f'{att.id}="" ', "")
|
|
112
|
+
return str_to_check
|
|
113
|
+
|
|
114
|
+
outfile = ""
|
|
115
|
+
structure_urn = get_structure(dataset)
|
|
116
|
+
id_structure = parse_short_urn(structure_urn).id
|
|
117
|
+
sdmx_type = parse_short_urn(structure_urn).id
|
|
118
|
+
# Remove nan values from DataFrame
|
|
119
|
+
dataset.data = dataset.data.fillna("").astype(str).replace("nan", "")
|
|
120
|
+
|
|
121
|
+
nl = "\n" if prettyprint else ""
|
|
122
|
+
child1 = "\t" if prettyprint else ""
|
|
123
|
+
|
|
124
|
+
attached_attributes_str = ""
|
|
125
|
+
for k, v in dataset.attributes.items():
|
|
126
|
+
attached_attributes_str += f"{k}={str(v)!r} "
|
|
127
|
+
datascope = ""
|
|
128
|
+
if not references_30:
|
|
129
|
+
datascope = f'ss:dataScope="{sdmx_type}" '
|
|
130
|
+
# Datasets
|
|
131
|
+
outfile += (
|
|
132
|
+
f"{nl}{child1}<{ABBR_MSG}:DataSet {attached_attributes_str}"
|
|
133
|
+
f"ss:structureRef={id_structure!r} "
|
|
134
|
+
f'xsi:type="ns{count}:DataSetType" '
|
|
135
|
+
f"{datascope}"
|
|
136
|
+
f'action="{dataset.action.value}">{nl}'
|
|
137
|
+
)
|
|
138
|
+
data = ""
|
|
139
|
+
if dim == ALL_DIM:
|
|
140
|
+
data += __memory_optimization_writing(dataset, prettyprint)
|
|
141
|
+
else:
|
|
142
|
+
writing_validation(dataset)
|
|
143
|
+
series_codes, obs_codes = get_codes(
|
|
144
|
+
dimension_code=dim,
|
|
145
|
+
structure=dataset.structure, # type: ignore[arg-type]
|
|
146
|
+
data=dataset.data,
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
data += __series_processing(
|
|
150
|
+
data=dataset.data,
|
|
151
|
+
series_codes=series_codes,
|
|
152
|
+
obs_codes=obs_codes,
|
|
153
|
+
prettyprint=prettyprint,
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
# Remove optional attributes empty data
|
|
157
|
+
data = __remove_optional_attributes_empty_data(data)
|
|
158
|
+
|
|
159
|
+
# Adding to outfile
|
|
160
|
+
outfile += data
|
|
161
|
+
|
|
162
|
+
outfile += f"{child1}</{ABBR_MSG}:DataSet>"
|
|
163
|
+
|
|
164
|
+
return outfile.replace("'", '"')
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def __obs_processing(data: pd.DataFrame, prettyprint: bool = True) -> str:
|
|
168
|
+
def __format_obs_str(element: Dict[str, Any]) -> str:
|
|
169
|
+
"""Formats the observation as key=value pairs."""
|
|
170
|
+
nl = "\n" if prettyprint else ""
|
|
171
|
+
child2 = "\t\t" if prettyprint else ""
|
|
172
|
+
|
|
173
|
+
out = f"{child2}<Obs "
|
|
174
|
+
|
|
175
|
+
for k, v in element.items():
|
|
176
|
+
out += f"{k}={__escape_xml(str(v))!r} "
|
|
177
|
+
|
|
178
|
+
out += f"/>{nl}"
|
|
179
|
+
|
|
180
|
+
return out
|
|
181
|
+
|
|
182
|
+
parser = lambda x: __format_obs_str(x) # noqa: E731
|
|
183
|
+
|
|
184
|
+
iterator = map(parser, data.to_dict(orient="records"))
|
|
185
|
+
|
|
186
|
+
return "".join(iterator)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def __series_processing(
|
|
190
|
+
data: pd.DataFrame,
|
|
191
|
+
series_codes: List[str],
|
|
192
|
+
obs_codes: List[str],
|
|
193
|
+
prettyprint: bool = True,
|
|
194
|
+
) -> str:
|
|
195
|
+
def __generate_series_str() -> str:
|
|
196
|
+
"""Generates the series item with its observations."""
|
|
197
|
+
out_list: List[str] = []
|
|
198
|
+
data.groupby(by=series_codes)[obs_codes].apply(
|
|
199
|
+
lambda x: __format_dict_ser(out_list, x)
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
return "".join(out_list)
|
|
203
|
+
|
|
204
|
+
def __format_dict_ser(
|
|
205
|
+
output_list: List[str],
|
|
206
|
+
obs: Any,
|
|
207
|
+
) -> Any:
|
|
208
|
+
"""Formats the series as key=value pairs."""
|
|
209
|
+
# Creating the observation dict,
|
|
210
|
+
# we always get the first element on Series
|
|
211
|
+
# as we are grouping by it
|
|
212
|
+
data_dict["Series"][0]["Obs"] = obs.to_dict(orient="records")
|
|
213
|
+
output_list.append(__format_ser_str(data_dict["Series"][0]))
|
|
214
|
+
# We remove the data for series as it is no longer necessary
|
|
215
|
+
del data_dict["Series"][0]
|
|
216
|
+
|
|
217
|
+
def __format_ser_str(data_info: Dict[Any, Any]) -> str:
|
|
218
|
+
"""Formats the series as key=value pairs."""
|
|
219
|
+
child2 = "\t\t" if prettyprint else ""
|
|
220
|
+
child3 = "\t\t\t" if prettyprint else ""
|
|
221
|
+
nl = "\n" if prettyprint else ""
|
|
222
|
+
|
|
223
|
+
out_element = f"{child2}<Series "
|
|
224
|
+
|
|
225
|
+
for k, v in data_info.items():
|
|
226
|
+
if k != "Obs":
|
|
227
|
+
out_element += f"{k}={__escape_xml(str(v))!r} "
|
|
228
|
+
|
|
229
|
+
out_element += f">{nl}"
|
|
230
|
+
|
|
231
|
+
for obs in data_info["Obs"]:
|
|
232
|
+
out_element += f"{child3}<Obs "
|
|
233
|
+
|
|
234
|
+
for k, v in obs.items():
|
|
235
|
+
out_element += f"{k}={__escape_xml(str(v))!r} "
|
|
236
|
+
|
|
237
|
+
out_element += f"/>{nl}"
|
|
238
|
+
|
|
239
|
+
out_element += f"{child2}</Series>{nl}"
|
|
240
|
+
|
|
241
|
+
return out_element
|
|
242
|
+
|
|
243
|
+
# Getting each datapoint from data and creating dict
|
|
244
|
+
data = data.sort_values(series_codes, axis=0)
|
|
245
|
+
data_dict = {
|
|
246
|
+
"Series": data[series_codes]
|
|
247
|
+
.drop_duplicates()
|
|
248
|
+
.reset_index(drop=True)
|
|
249
|
+
.to_dict(orient="records")
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
out = __generate_series_str()
|
|
253
|
+
|
|
254
|
+
return out
|
|
@@ -3,11 +3,14 @@
|
|
|
3
3
|
from io import BytesIO
|
|
4
4
|
|
|
5
5
|
from lxml import etree
|
|
6
|
-
from sdmxschemas import SDMX_ML_21_MESSAGE_PATH as
|
|
6
|
+
from sdmxschemas import SDMX_ML_21_MESSAGE_PATH as SCHEMA_PATH_21
|
|
7
|
+
from sdmxschemas import SDMX_ML_30_MESSAGE_PATH as SCHEMA_PATH_30
|
|
7
8
|
|
|
8
9
|
from pysdmx.errors import Invalid
|
|
9
10
|
from pysdmx.io.xml.__allowed_lxml_errors import ALLOWED_ERRORS_CONTENT
|
|
10
11
|
|
|
12
|
+
SCHEMA_ROOT_30 = "http://www.sdmx.org/resources/sdmxml/schemas/v3_0/"
|
|
13
|
+
|
|
11
14
|
|
|
12
15
|
def validate_doc(input_str: str) -> None:
|
|
13
16
|
"""Validates the SDMX-ML data against the XSD schema for SDMX-ML 2.1.
|
|
@@ -19,7 +22,12 @@ def validate_doc(input_str: str) -> None:
|
|
|
19
22
|
Invalid: If the SDMX-ML data does not validate against the schema.
|
|
20
23
|
"""
|
|
21
24
|
parser = etree.ETCompatXMLParser()
|
|
22
|
-
|
|
25
|
+
check = input_str[:1000].lower()
|
|
26
|
+
if SCHEMA_ROOT_30 in check:
|
|
27
|
+
xmlschema_doc = etree.parse(SCHEMA_PATH_30)
|
|
28
|
+
else:
|
|
29
|
+
xmlschema_doc = etree.parse(SCHEMA_PATH_21)
|
|
30
|
+
|
|
23
31
|
xmlschema = etree.XMLSchema(xmlschema_doc)
|
|
24
32
|
|
|
25
33
|
bytes_infile = BytesIO(bytes(input_str, "UTF_8"))
|