pysdmx 1.10.0rc2__py3-none-any.whl → 1.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pysdmx/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """Your opinionated Python SDMX library."""
2
2
 
3
- __version__ = "1.10.0rc2"
3
+ __version__ = "1.10.1"
@@ -39,6 +39,7 @@ from pysdmx.model import (
39
39
  Dataflow,
40
40
  DataflowInfo,
41
41
  DataProvider,
42
+ DataStructureDefinition,
42
43
  Hierarchy,
43
44
  HierarchyAssociation,
44
45
  Metadataflow,
@@ -679,7 +680,7 @@ class RegistryClient(__BaseRegistryClient):
679
680
  agency: str = "*",
680
681
  id: str = "*",
681
682
  version: str = "+",
682
- ) -> Sequence[Dataflow]:
683
+ ) -> Sequence[DataStructureDefinition]:
683
684
  """Get the data structures(s) matching the supplied parameters.
684
685
 
685
686
  Args:
@@ -1253,7 +1254,7 @@ class AsyncRegistryClient(__BaseRegistryClient):
1253
1254
  agency: str = "*",
1254
1255
  id: str = "*",
1255
1256
  version: str = "+",
1256
- ) -> Sequence[Dataflow]:
1257
+ ) -> Sequence[DataStructureDefinition]:
1257
1258
  """Get the data structures(s) matching the supplied parameters.
1258
1259
 
1259
1260
  Args:
pysdmx/api/qb/data.py CHANGED
@@ -329,6 +329,8 @@ class DataQuery(_CoreDataQuery, frozen=True, omit_defaults=True):
329
329
 
330
330
  def __get_short_v2_qs(self, api_version: ApiVersion) -> str:
331
331
  qs = ""
332
+ if self.components:
333
+ qs += self._create_component_filters(self.components)
332
334
  if self.updated_after:
333
335
  qs = super()._append_qs_param(
334
336
  qs,
@@ -3,8 +3,6 @@ from typing import List, Literal, Optional, Sequence
3
3
 
4
4
  import pandas as pd
5
5
 
6
- from pysdmx.errors import Invalid
7
- from pysdmx.io._pd_utils import _fill_na_values
8
6
  from pysdmx.io.pd import PandasDataset
9
7
  from pysdmx.model import Schema
10
8
  from pysdmx.model.dataset import ActionType
@@ -18,25 +16,6 @@ SDMX_CSV_ACTION_MAPPER = {
18
16
  }
19
17
 
20
18
 
21
- def _validate_schema_exists(dataset: PandasDataset) -> Schema:
22
- """Validates that the dataset has a Schema defined.
23
-
24
- Args:
25
- dataset: The dataset to validate.
26
-
27
- Returns:
28
- The `Schema` from the dataset.
29
-
30
- Raises:
31
- Invalid: If the structure is not a `Schema`.
32
- """
33
- if not isinstance(dataset.structure, Schema):
34
- raise Invalid(
35
- "Dataset Structure is not a Schema. Cannot perform operation."
36
- )
37
- return dataset.structure
38
-
39
-
40
19
  def __write_time_period(df: pd.DataFrame, time_format: str) -> None:
41
20
  # TODO: Correct handle of normalized time format
42
21
  raise NotImplementedError("Normalized time format is not implemented yet.")
@@ -91,10 +70,8 @@ def _write_csv_2_aux(
91
70
  ) -> List[pd.DataFrame]:
92
71
  dataframes = []
93
72
  for dataset in datasets:
94
- schema = _validate_schema_exists(dataset)
95
73
  # Create a copy of the dataset
96
74
  df: pd.DataFrame = copy(dataset.data)
97
- df = _fill_na_values(df, schema)
98
75
  structure_ref, unique_id = dataset.short_urn.split("=", maxsplit=1)
99
76
 
100
77
  # Add additional attributes to the dataset
@@ -24,7 +24,7 @@ def read(input_str: str) -> Sequence[PandasDataset]:
24
24
  """
25
25
  # Get Dataframe from CSV file
26
26
  df_csv = pd.read_csv(
27
- StringIO(input_str), keep_default_na=False, na_values=[]
27
+ StringIO(input_str), keep_default_na=False, na_values=[""]
28
28
  )
29
29
  # Drop empty columns
30
30
  df_csv = df_csv.dropna(axis=1, how="all")
@@ -6,12 +6,9 @@ from typing import Literal, Optional, Sequence, Union
6
6
 
7
7
  import pandas as pd
8
8
 
9
- from pysdmx.io._pd_utils import _fill_na_values
10
- from pysdmx.io.csv.__csv_aux_writer import (
11
- __write_time_period,
12
- _validate_schema_exists,
13
- )
9
+ from pysdmx.io.csv.__csv_aux_writer import __write_time_period
14
10
  from pysdmx.io.pd import PandasDataset
11
+ from pysdmx.model import Schema
15
12
  from pysdmx.toolkit.pd._data_utils import format_labels
16
13
 
17
14
 
@@ -47,26 +44,22 @@ def write(
47
44
  # Create a copy of the dataset
48
45
  dataframes = []
49
46
  for dataset in datasets:
50
- # Validate that dataset has a proper Schema
51
- schema = _validate_schema_exists(dataset)
52
-
53
47
  df: pd.DataFrame = copy(dataset.data)
54
48
 
55
- # Fill missing values
56
- df = _fill_na_values(df, schema)
57
-
58
49
  # Add additional attributes to the dataset
59
50
  for k, v in dataset.attributes.items():
60
51
  df[k] = v
61
52
  structure_id = dataset.short_urn.split("=")[1]
62
53
  if time_format is not None and time_format != "original":
63
54
  __write_time_period(df, time_format)
64
- if labels is not None:
65
- format_labels(df, labels, schema.components)
55
+ if labels is not None and isinstance(dataset.structure, Schema):
56
+ format_labels(df, labels, dataset.structure.components)
66
57
  if labels == "id":
67
58
  df.insert(0, "DATAFLOW", structure_id)
68
59
  else:
69
- df.insert(0, "DATAFLOW", f"{structure_id}:{schema.name}")
60
+ df.insert(
61
+ 0, "DATAFLOW", f"{structure_id}:{dataset.structure.name}"
62
+ )
70
63
  else:
71
64
  df.insert(0, "DATAFLOW", structure_id)
72
65
 
@@ -75,7 +68,8 @@ def write(
75
68
  # Concatenate the dataframes
76
69
  all_data = pd.concat(dataframes, ignore_index=True, axis=0)
77
70
 
78
- all_data = all_data.astype(str)
71
+ # Ensure null values are represented as empty strings
72
+ all_data = all_data.astype(str).replace({"nan": "", "<NA>": ""})
79
73
  # If the output path is an empty string we use None
80
74
  output_path = (
81
75
  None
@@ -24,7 +24,7 @@ def read(input_str: str) -> Sequence[PandasDataset]:
24
24
  """
25
25
  # Get Dataframe from CSV file
26
26
  df_csv = pd.read_csv(
27
- StringIO(input_str), keep_default_na=False, na_values=[]
27
+ StringIO(input_str), keep_default_na=False, na_values=[""]
28
28
  )
29
29
  # Drop empty columns
30
30
  df_csv = df_csv.dropna(axis=1, how="all")
@@ -60,7 +60,7 @@ def write(
60
60
 
61
61
  all_data = pd.concat(dataframes, ignore_index=True, axis=0)
62
62
 
63
- all_data = all_data.astype(str)
63
+ all_data = all_data.astype(str).replace({"nan": "", "<NA>": ""})
64
64
 
65
65
  # If the output path is an empty string we use None
66
66
  output_path = (
@@ -24,7 +24,7 @@ def read(input_str: str) -> Sequence[PandasDataset]:
24
24
  """
25
25
  # Get Dataframe from CSV file
26
26
  df_csv = pd.read_csv(
27
- StringIO(input_str), keep_default_na=False, na_values=[]
27
+ StringIO(input_str), keep_default_na=False, na_values=[""]
28
28
  )
29
29
  # Drop empty columns
30
30
  df_csv = df_csv.dropna(axis=1, how="all")
@@ -57,7 +57,7 @@ def write(
57
57
 
58
58
  all_data = pd.concat(dataframes, ignore_index=True, axis=0)
59
59
 
60
- all_data = all_data.astype(str)
60
+ all_data = all_data.astype(str).replace({"nan": "", "<NA>": ""})
61
61
 
62
62
  # If the output path is an empty string we use None
63
63
  output_path = (
@@ -80,8 +80,14 @@ class FusionAttribute(Struct, frozen=True):
80
80
  measureReferences: Optional[Sequence[str]] = None
81
81
 
82
82
  def __derive_level(self, groups: Sequence[FusionGroup]) -> str:
83
- if self.attachmentLevel == "OBSERVATION":
84
- return "O"
83
+ if self.measureReferences:
84
+ if (
85
+ len(self.measureReferences) == 1
86
+ and self.measureReferences[0] == "OBS_VALUE"
87
+ ):
88
+ return "O"
89
+ else:
90
+ return ",".join(self.measureReferences)
85
91
  elif self.attachmentLevel == "DATA_SET":
86
92
  return "D"
87
93
  elif self.attachmentLevel == "GROUP":
@@ -126,7 +126,10 @@ class JsonAttributeRelationship(Struct, frozen=True, omit_defaults=True):
126
126
  ) -> str:
127
127
  """Returns the attachment level."""
128
128
  if measures:
129
- return "O"
129
+ if len(measures) == 1 and measures[0] == "OBS_VALUE":
130
+ return "O"
131
+ else:
132
+ return ",".join(measures)
130
133
  elif self.dimensions:
131
134
  return ",".join(self.dimensions)
132
135
  elif self.group:
@@ -136,15 +139,17 @@ class JsonAttributeRelationship(Struct, frozen=True, omit_defaults=True):
136
139
  return "D"
137
140
 
138
141
  @classmethod
139
- def from_model(self, rel: str) -> "JsonAttributeRelationship":
142
+ def from_model(
143
+ self, rel: str, has_measure_rel: bool = False
144
+ ) -> "JsonAttributeRelationship":
140
145
  """Converts a pysdmx attribute relationship to an SDMX-JSON one."""
141
146
  if rel == "D":
142
147
  return JsonAttributeRelationship(dataflow={})
143
- elif rel == "O":
148
+ elif rel == "O" or has_measure_rel:
144
149
  return JsonAttributeRelationship(observation={})
145
150
  else:
146
- dims = rel.split(",")
147
- return JsonAttributeRelationship(dimensions=dims)
151
+ comps = rel.split(",")
152
+ return JsonAttributeRelationship(dimensions=comps)
148
153
 
149
154
 
150
155
  class JsonDimension(Struct, frozen=True, omit_defaults=True):
@@ -257,17 +262,29 @@ class JsonAttribute(Struct, frozen=True, omit_defaults=True):
257
262
  )
258
263
 
259
264
  @classmethod
260
- def from_model(self, attribute: Component) -> "JsonAttribute":
265
+ def from_model(
266
+ self, attribute: Component, measures: Sequence[Component]
267
+ ) -> "JsonAttribute":
261
268
  """Converts a pysdmx attribute to an SDMX-JSON one."""
262
269
  concept = _get_concept_reference(attribute)
263
270
  usage = "mandatory" if attribute.required else "optional"
271
+ repr = _get_json_representation(attribute)
272
+
273
+ ids = attribute.attachment_level.split(",") # type: ignore[union-attr]
274
+ comps = set(ids)
275
+ mids = {m.id for m in measures}
276
+ has_measure_rel = len(comps.intersection(mids)) > 0
264
277
  level = JsonAttributeRelationship.from_model(
265
- attribute.attachment_level # type: ignore[arg-type]
278
+ attribute.attachment_level, # type: ignore[arg-type]
279
+ has_measure_rel,
266
280
  )
267
- repr = _get_json_representation(attribute)
268
- # The line below will need to be changed when we work on
269
- # Measure Relationship (cf. issue #467)
270
- mr = ["OBS_VALUE"] if attribute.attachment_level == "O" else None
281
+
282
+ if attribute.attachment_level == "O":
283
+ mr = ["OBS_VALUE"]
284
+ elif has_measure_rel:
285
+ mr = ids
286
+ else:
287
+ mr = None
271
288
 
272
289
  return JsonAttribute(
273
290
  id=attribute.id,
@@ -356,12 +373,14 @@ class JsonAttributes(Struct, frozen=True, omit_defaults=True):
356
373
 
357
374
  @classmethod
358
375
  def from_model(
359
- self, attributes: Sequence[Component]
376
+ self, attributes: Sequence[Component], measures: Sequence[Component]
360
377
  ) -> Optional["JsonAttributes"]:
361
378
  """Converts a pysdmx list of attributes to an SDMX-JSON one."""
362
379
  if len(attributes) > 0:
363
380
  return JsonAttributes(
364
- attributes=[JsonAttribute.from_model(a) for a in attributes]
381
+ attributes=[
382
+ JsonAttribute.from_model(a, measures) for a in attributes
383
+ ]
365
384
  )
366
385
  else:
367
386
  return None
@@ -502,7 +521,9 @@ class JsonComponents(Struct, frozen=True, omit_defaults=True):
502
521
  ) -> "JsonComponents":
503
522
  """Converts a pysdmx components list to an SDMX-JSON one."""
504
523
  dimensions = JsonDimensions.from_model(components.dimensions)
505
- attributes = JsonAttributes.from_model(components.attributes)
524
+ attributes = JsonAttributes.from_model(
525
+ components.attributes, components.measures
526
+ )
506
527
  measures = JsonMeasures.from_model(components.measures)
507
528
  if grps is None:
508
529
  groups = []
@@ -88,6 +88,10 @@ def validate_sdmx_json(input_str: str) -> None:
88
88
  lambda m: f"does not match required"
89
89
  f" pattern {m.group(1)!r}",
90
90
  ),
91
+ (
92
+ r"\[\]\s+is\s+too\s+short",
93
+ lambda _m: "[] should be non-empty",
94
+ ),
91
95
  ]
92
96
 
93
97
  msg: Optional[str] = next(
@@ -26,6 +26,8 @@ def check_dimension_at_observation(
26
26
  for key, value in dimension_at_observation.items():
27
27
  if key not in datasets:
28
28
  raise Invalid(f"Dataset {key} not found in Message content.")
29
+ if value == ALL_DIM:
30
+ continue
29
31
  writing_validation(datasets[key])
30
32
  dataset = datasets[key]
31
33
  components = dataset.structure.components # type: ignore[union-attr]
@@ -42,31 +44,20 @@ def check_dimension_at_observation(
42
44
  return dimension_at_observation
43
45
 
44
46
 
45
- def writing_validation(dataset: PandasDataset) -> Schema:
46
- """Structural validation of the dataset.
47
-
48
- Args:
49
- dataset: The dataset to validate.
50
-
51
- Returns:
52
- The `Schema` from the dataset.
53
-
54
- Raises:
55
- Invalid: If the structure is not a `Schema` or validation fails.
56
- """
47
+ def writing_validation(dataset: PandasDataset) -> None:
48
+ """Structural validation of the dataset."""
57
49
  if not isinstance(dataset.structure, Schema):
58
50
  raise Invalid(
59
51
  "Dataset Structure is not a Schema. Cannot perform operation."
60
52
  )
61
- schema = dataset.structure
62
53
  required_components = [
63
54
  comp.id
64
- for comp in schema.components
55
+ for comp in dataset.structure.components
65
56
  if comp.role in (Role.DIMENSION, Role.MEASURE)
66
57
  ]
67
58
  required_components.extend(
68
59
  att.id
69
- for att in schema.components.attributes
60
+ for att in dataset.structure.components.attributes
70
61
  if (
71
62
  att.required
72
63
  and att.attachment_level is not None
@@ -75,7 +66,7 @@ def writing_validation(dataset: PandasDataset) -> Schema:
75
66
  )
76
67
  non_required = [
77
68
  comp.id
78
- for comp in schema.components
69
+ for comp in dataset.structure.components
79
70
  if comp.id not in required_components
80
71
  ]
81
72
  # Columns match components
@@ -91,11 +82,9 @@ def writing_validation(dataset: PandasDataset) -> Schema:
91
82
  f"Difference: {', '.join(difference)}"
92
83
  )
93
84
  # Check if the dataset has at least one dimension and one measure
94
- if not schema.components.dimensions:
85
+ if not dataset.structure.components.dimensions:
95
86
  raise Invalid(
96
87
  "The dataset structure must have at least one dimension."
97
88
  )
98
- if not schema.components.measures:
89
+ if not dataset.structure.components.measures:
99
90
  raise Invalid("The dataset structure must have at least one measure.")
100
-
101
- return schema
@@ -1,11 +1,10 @@
1
1
  # mypy: disable-error-code="union-attr"
2
2
  """Module for writing SDMX-ML 3.0 Structure Specific auxiliary functions."""
3
3
 
4
- from typing import Any, Dict, Hashable, List
4
+ from typing import Any, Dict, List
5
5
 
6
6
  import pandas as pd
7
7
 
8
- from pysdmx.io._pd_utils import _fill_na_values
9
8
  from pysdmx.io.pd import PandasDataset
10
9
  from pysdmx.io.xml.__write_aux import (
11
10
  ABBR_MSG,
@@ -70,6 +69,9 @@ def __write_data_structure_specific(
70
69
  outfile = ""
71
70
 
72
71
  for i, (short_urn, dataset) in enumerate(datasets.items()):
72
+ dataset.data = dataset.data.astype(str).replace(
73
+ {"nan": "", "<NA>": ""}
74
+ )
73
75
  outfile += __write_data_single_dataset(
74
76
  dataset=dataset,
75
77
  prettyprint=prettyprint,
@@ -113,12 +115,8 @@ def __write_data_single_dataset(
113
115
  structure_urn = get_structure(dataset)
114
116
  id_structure = parse_short_urn(structure_urn).id
115
117
  sdmx_type = parse_short_urn(structure_urn).id
116
-
117
- # Validate structure before writing
118
- schema = writing_validation(dataset)
119
-
120
118
  # Remove nan values from DataFrame
121
- dataset.data = _fill_na_values(dataset.data, schema)
119
+ dataset.data = dataset.data.fillna("").astype(str).replace("nan", "")
122
120
 
123
121
  nl = "\n" if prettyprint else ""
124
122
  child1 = "\t" if prettyprint else ""
@@ -141,6 +139,7 @@ def __write_data_single_dataset(
141
139
  if dim == ALL_DIM:
142
140
  data += __memory_optimization_writing(dataset, prettyprint)
143
141
  else:
142
+ writing_validation(dataset)
144
143
  series_codes, obs_codes, group_codes = get_codes(
145
144
  dimension_code=dim,
146
145
  structure=dataset.structure, # type: ignore[arg-type]
@@ -231,85 +230,69 @@ def __obs_processing(data: pd.DataFrame, prettyprint: bool = True) -> str:
231
230
  return "".join(iterator)
232
231
 
233
232
 
234
- def __format_ser_str(
235
- data_info: Dict[Any, Any], prettyprint: bool = True
233
+ def __series_processing(
234
+ data: pd.DataFrame,
235
+ series_codes: List[str],
236
+ obs_codes: List[str],
237
+ prettyprint: bool = True,
236
238
  ) -> str:
237
- """Formats the series as key=value pairs."""
238
- child2 = "\t\t" if prettyprint else ""
239
- child3 = "\t\t\t" if prettyprint else ""
240
- nl = "\n" if prettyprint else ""
239
+ def __generate_series_str() -> str:
240
+ """Generates the series item with its observations."""
241
+ out_list: List[str] = []
242
+ data.groupby(by=series_codes)[obs_codes].apply(
243
+ lambda x: __format_dict_ser(out_list, x)
244
+ )
241
245
 
242
- out_element = f"{child2}<Series "
246
+ return "".join(out_list)
243
247
 
244
- for k, v in data_info.items():
245
- if k != "Obs":
246
- out_element += f"{k}={__escape_xml(str(v))!r} "
248
+ def __format_dict_ser(
249
+ output_list: List[str],
250
+ obs: Any,
251
+ ) -> Any:
252
+ """Formats the series as key=value pairs."""
253
+ # Creating the observation dict,
254
+ # we always get the first element on Series
255
+ # as we are grouping by it
256
+ data_dict["Series"][0]["Obs"] = obs.to_dict(orient="records")
257
+ output_list.append(__format_ser_str(data_dict["Series"][0]))
258
+ # We remove the data for series as it is no longer necessary
259
+ del data_dict["Series"][0]
247
260
 
248
- out_element += f">{nl}"
261
+ def __format_ser_str(data_info: Dict[Any, Any]) -> str:
262
+ """Formats the series as key=value pairs."""
263
+ child2 = "\t\t" if prettyprint else ""
264
+ child3 = "\t\t\t" if prettyprint else ""
265
+ nl = "\n" if prettyprint else ""
249
266
 
250
- for obs in data_info["Obs"]:
251
- out_element += f"{child3}<Obs "
267
+ out_element = f"{child2}<Series "
252
268
 
253
- for k, v in obs.items():
254
- out_element += f"{k}={__escape_xml(str(v))!r} "
269
+ for k, v in data_info.items():
270
+ if k != "Obs":
271
+ out_element += f"{k}={__escape_xml(str(v))!r} "
255
272
 
256
- out_element += f"/>{nl}"
273
+ out_element += f">{nl}"
274
+
275
+ for obs in data_info["Obs"]:
276
+ out_element += f"{child3}<Obs "
257
277
 
258
- out_element += f"{child2}</Series>{nl}"
278
+ for k, v in obs.items():
279
+ out_element += f"{k}={__escape_xml(str(v))!r} "
259
280
 
260
- return out_element
281
+ out_element += f"/>{nl}"
261
282
 
283
+ out_element += f"{child2}</Series>{nl}"
262
284
 
263
- def __build_series_dict(
264
- data: pd.DataFrame, series_codes: List[str]
265
- ) -> Dict[str, List[Dict[Hashable, Any]]]:
266
- """Build series dictionary from data."""
267
- if not series_codes:
268
- return {"Series": [{}] if not data.empty else []}
269
- return {
285
+ return out_element
286
+
287
+ # Getting each datapoint from data and creating dict
288
+ data = data.sort_values(series_codes, axis=0)
289
+ data_dict = {
270
290
  "Series": data[series_codes]
271
291
  .drop_duplicates()
272
292
  .reset_index(drop=True)
273
293
  .to_dict(orient="records")
274
294
  }
275
295
 
296
+ out = __generate_series_str()
276
297
 
277
- def __process_series_observations(
278
- data: pd.DataFrame,
279
- series_codes: List[str],
280
- obs_codes: List[str],
281
- data_dict: Dict[str, List[Dict[Hashable, Any]]],
282
- prettyprint: bool = True,
283
- ) -> str:
284
- """Process series and their observations into XML string."""
285
- out_list: List[str] = []
286
-
287
- def append_series_with_obs(obs: Any) -> str:
288
- """Append series with observations to output list."""
289
- data_dict["Series"][0]["Obs"] = obs.to_dict(orient="records")
290
- result = __format_ser_str(data_dict["Series"][0], prettyprint)
291
- out_list.append(result)
292
- del data_dict["Series"][0]
293
- return result
294
-
295
- if not series_codes:
296
- if not data.empty:
297
- append_series_with_obs(data[obs_codes])
298
- else:
299
- data.groupby(by=series_codes)[obs_codes].apply(append_series_with_obs)
300
-
301
- return "".join(out_list)
302
-
303
-
304
- def __series_processing(
305
- data: pd.DataFrame,
306
- series_codes: List[str],
307
- obs_codes: List[str],
308
- prettyprint: bool = True,
309
- ) -> str:
310
- """Write series to SDMX-ML Structure-Specific format."""
311
- data = data.sort_values(series_codes, axis=0)
312
- data_dict = __build_series_dict(data, series_codes)
313
- return __process_series_observations(
314
- data, series_codes, obs_codes, data_dict, prettyprint
315
- )
298
+ return out
@@ -2,11 +2,10 @@
2
2
  """Module for writing SDMX-ML 2.1 Generic data messages."""
3
3
 
4
4
  from pathlib import Path
5
- from typing import Any, Dict, Hashable, List, Optional, Sequence, Tuple, Union
5
+ from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
6
6
 
7
7
  import pandas as pd
8
8
 
9
- from pysdmx.io._pd_utils import _fill_na_values
10
9
  from pysdmx.io.format import Format
11
10
  from pysdmx.io.pd import PandasDataset
12
11
  from pysdmx.io.xml.__write_aux import (
@@ -123,6 +122,7 @@ def __write_data_generic(
123
122
 
124
123
  for short_urn, dataset in datasets.items():
125
124
  writing_validation(dataset)
125
+ dataset.data = dataset.data.fillna("").astype(str)
126
126
  outfile += __write_data_single_dataset(
127
127
  dataset=dataset,
128
128
  prettyprint=prettyprint,
@@ -160,8 +160,7 @@ def __write_data_single_dataset(
160
160
  outfile = ""
161
161
  structure_urn = get_structure(dataset)
162
162
  id_structure = parse_short_urn(structure_urn).id
163
- schema = writing_validation(dataset)
164
- dataset.data = _fill_na_values(dataset.data, schema)
163
+ dataset.data = dataset.data.fillna("").astype(str).replace("nan", "")
165
164
 
166
165
  nl = "\n" if prettyprint else ""
167
166
  child1 = "\t" if prettyprint else ""
@@ -348,14 +347,9 @@ def __series_processing(
348
347
  ) -> str:
349
348
  def __generate_series_str() -> str:
350
349
  out_list: List[str] = []
351
- group_cols = series_codes + series_att_codes
352
- if not group_cols:
353
- if not data.empty:
354
- __format_dict_ser(out_list, data)
355
- else:
356
- data.groupby(by=group_cols)[data.columns].apply(
357
- lambda x: __format_dict_ser(out_list, x)
358
- )
350
+ data.groupby(by=series_codes + series_att_codes)[data.columns].apply(
351
+ lambda x: __format_dict_ser(out_list, x)
352
+ )
359
353
 
360
354
  return "".join(out_list)
361
355
 
@@ -365,14 +359,13 @@ def __series_processing(
365
359
  ) -> Any:
366
360
  obs_data = group_data[obs_codes + obs_att_codes].copy()
367
361
  data_dict["Series"][0]["Obs"] = obs_data.to_dict(orient="records")
368
- if series_att_codes:
369
- data_dict["Series"][0].update(
370
- {
371
- k: v
372
- for k, v in group_data[series_att_codes].iloc[0].items()
373
- if k in series_att_codes
374
- }
375
- )
362
+ data_dict["Series"][0].update(
363
+ {
364
+ k: v
365
+ for k, v in group_data[series_att_codes].iloc[0].items()
366
+ if k in series_att_codes
367
+ }
368
+ )
376
369
  output_list.append(
377
370
  __format_ser_str(
378
371
  data_info=data_dict["Series"][0],
@@ -387,17 +380,12 @@ def __series_processing(
387
380
 
388
381
  # Getting each datapoint from data and creating dict
389
382
  data = data.sort_values(series_codes, axis=0)
390
- if not series_codes:
391
- data_dict: Dict[str, List[Dict[Hashable, Any]]] = {
392
- "Series": [{}] if not data.empty else []
393
- }
394
- else:
395
- data_dict = {
396
- "Series": data[series_codes]
397
- .drop_duplicates()
398
- .reset_index(drop=True)
399
- .to_dict(orient="records")
400
- }
383
+ data_dict = {
384
+ "Series": data[series_codes]
385
+ .drop_duplicates()
386
+ .reset_index(drop=True)
387
+ .to_dict(orient="records")
388
+ }
401
389
 
402
390
  out = __generate_series_str()
403
391
 
pysdmx/model/concept.py CHANGED
@@ -35,8 +35,6 @@ class DataType(str, Enum):
35
35
  """True or False."""
36
36
  COUNT = "Count"
37
37
  """A simple incrementing integer type."""
38
- DATA_SET_REFERENCE = "DataSetReference"
39
- """Reference to a data set."""
40
38
  DATE = "GregorianDay"
41
39
  """A ISO 8601 date (e.g. ``2011-06-17``)."""
42
40
  DATE_TIME = "DateTime"
@@ -49,24 +47,12 @@ class DataType(str, Enum):
49
47
  """A decimal number (8 bytes)."""
50
48
  DURATION = "Duration"
51
49
  """An ISO 8601 duration."""
52
- EXCLUSIVE_VALUE_RANGE = "ExclusiveValueRange"
53
- """A range of values excluding boundaries."""
54
50
  FLOAT = "Float"
55
51
  """A decimal number (4 bytes)."""
56
- GEOSPATIAL_INFORMATION = "GeospatialInformation"
57
- """Geospatial data format."""
58
52
  GREGORIAN_TIME_PERIOD = "GregorianTimePeriod"
59
53
  """This is the union of YEAR, YEAR_MONTH, and DATE."""
60
- IDENTIFIABLE_REFERENCE = "IdentifiableReference"
61
- """Reference to an identifiable object."""
62
- INCLUSIVE_VALUE_RANGE = "InclusiveValueRange"
63
- """A range of values including boundaries."""
64
- INCREMENTAL = "Incremental"
65
- """An integer type that increases sequentially."""
66
54
  INTEGER = "Integer"
67
55
  """A whole number (4 bytes)."""
68
- KEY_VALUES = "KeyValues"
69
- """Key values reference."""
70
56
  LONG = "Long"
71
57
  """A whole number (8 bytes)."""
72
58
  MONTH = "Month"
@@ -99,8 +85,6 @@ class DataType(str, Enum):
99
85
  """A string (as immutable sequence of Unicode code points)."""
100
86
  TIME = "Time"
101
87
  """An ISO 8601 time (e.g. ``12:50:42``)."""
102
- TIMES_RANGE = "TimesRange"
103
- """A range of time periods."""
104
88
  URI = "URI"
105
89
  """A uniform resource identifier, such as a URL."""
106
90
  XHTML = "XHTML"
pysdmx/model/dataflow.py CHANGED
@@ -101,10 +101,11 @@ class Component(
101
101
  one of: *D* (for Dataset), *O* (for Observation), any string identifying a
102
102
  component ID (FREQ) or comma-separated list of component IDs
103
103
  (FREQ,REF_AREA). The latter can be used to identify the dimension, group
104
- or series to which the attribute is attached. The attachment level of a
105
- component may vary with the statistical domain, i.e. a component attached
106
- to a series in a particular domain may be attached to, say, the dataset in
107
- another domain.
104
+ or series to which the attribute is attached. It can also be used to
105
+ identify the measure(s) to which the attribute relates, in case multiple
106
+ measures are defined. The attachment level of a component may vary with the
107
+ statistical domain, i.e. a component attached to a series in a particular
108
+ domain may be attached to, say, the dataset in another domain.
108
109
 
109
110
  The *codes* field indicates the expected (i.e. allowed) set of values a
110
111
  component can take within a particular domain. In addition to
@@ -128,7 +129,9 @@ class Component(
128
129
  Attributes can be attached at different levels such as
129
130
  D (for dataset-level attributes), O (for observation-level
130
131
  attributes) or a combination of dimension IDs, separated by
131
- commas, for series- and group-level attributes).
132
+ commas, for series- and group-level attributes, as well as for
133
+ attributes attached to one or more measures, when multiple
134
+ measures are defined).
132
135
  A post_init check makes this attribute mandatory for attributes.
133
136
  array_def: Any additional constraints for array types.
134
137
  urn: The URN of the component.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pysdmx
3
- Version: 1.10.0rc2
3
+ Version: 1.10.1
4
4
  Summary: Your opinionated Python SDMX library
5
5
  License: Apache-2.0
6
6
  License-File: LICENSE
@@ -1,5 +1,5 @@
1
1
  pysdmx/__extras_check.py,sha256=Tmluui2OuJVyJB6a1Jl0PlrRjpsswhtCjAqtRLOSero,2059
2
- pysdmx/__init__.py,sha256=Q03oQ7Jwt3q1RWCdX2lHnJ8s7cd-qe2_l47m90Dqdgo,71
2
+ pysdmx/__init__.py,sha256=76qTEXwql_RGDSAgomUh23-P8FW79kx6Pcvc8iKZR8s,68
3
3
  pysdmx/api/__init__.py,sha256=8lRaF6kEO51ehl0fmW_pHLvkN_34TtEhqhr3oKo6E6g,26
4
4
  pysdmx/api/dc/__init__.py,sha256=oPU32X8CRZy4T1to9mO5KMqMwxQsVI424dPqai-I8zI,121
5
5
  pysdmx/api/dc/_api.py,sha256=poy1FYFXnF6maBGy5lpOodf32-7QQjH8PCBNDkuOXxQ,7747
@@ -10,12 +10,12 @@ pysdmx/api/dc/query/_parsing_util.py,sha256=pUc5z6sijGmJZsLilAxiPsCSRIO7l2iznzL3
10
10
  pysdmx/api/dc/query/_py_parser.py,sha256=_kVUk6Xu5jZdclng1F6eDSZS2-ok_yncI1y1q5lYpBU,1502
11
11
  pysdmx/api/dc/query/_sql_parser.py,sha256=vQjhSyt6qA4jAHchkq4XXVhNPtKjKSVLzhoPkUyhJKk,1561
12
12
  pysdmx/api/dc/query/util.py,sha256=9HALmvlgVCckaMTVG7sEFbAw_mBwfbL4K-Pac2KPSYw,915
13
- pysdmx/api/fmr/__init__.py,sha256=-0v6VBGt49k7-TDUMPfu7v6RHn6br10BUiMPneQjQjM,50473
13
+ pysdmx/api/fmr/__init__.py,sha256=uXD2dOY2lJSY6CoracIh8c1_7wWiJcsQ5ebIEIbHumk,50532
14
14
  pysdmx/api/fmr/maintenance.py,sha256=AnR1beyL6nsoDM5LmDLXnRMW5JvhGHXTADf_INSNgUg,5920
15
15
  pysdmx/api/gds/__init__.py,sha256=BBl75wEdcz9zPMfk6kAGHitRW39S774VL9xifMZ_uHs,11560
16
16
  pysdmx/api/qb/__init__.py,sha256=vxdMJFFg4J_KWADrnzj_8KcU0UlwpJPdx0yiW3QJo9Y,1498
17
17
  pysdmx/api/qb/availability.py,sha256=2yPHnTXu_jACNKNhhtXMkxVkfLK1Ewa5ucGAbRxvC5o,10181
18
- pysdmx/api/qb/data.py,sha256=RUcSQBZYkqlrD2ehrGxUWUEAyjbfl5CSgjQ6BwN3hus,22049
18
+ pysdmx/api/qb/data.py,sha256=hQayPQnN_ebBgIoC1xDei2uTbyv0UsoM4qyZyGaNz78,22143
19
19
  pysdmx/api/qb/gds.py,sha256=Z2KhP6m09_oWI2CbYRhlTsx8VLC-_UZaQEOEqX94SOw,4975
20
20
  pysdmx/api/qb/refmeta.py,sha256=--h0QOvaLGILT_6GBAZ2Ld5aqAELEW-PTsHNcj7YoG8,10677
21
21
  pysdmx/api/qb/registration.py,sha256=IURlmXcXQi8e-w5YXCgRNs07EQJZJ2bOdZb7M_k5iZ8,7132
@@ -25,19 +25,18 @@ pysdmx/api/qb/structure.py,sha256=0m_Fmp410Rfjdv0ehLS8ivwccwn-xfBkcIdYayu8pgg,17
25
25
  pysdmx/api/qb/util.py,sha256=at2Sb2kVltSTDc1gKiqG6HtIFhjSx-Msbe--wCvRbQI,3667
26
26
  pysdmx/errors.py,sha256=9bfujYykzfGMa1TuUOmH9QqghnZGOo556fvbKH2jFa8,3295
27
27
  pysdmx/io/__init__.py,sha256=96ZCY1PfcWp_q2Nlo2tHJAK31sH_b05v9UkbR0vGdg0,180
28
- pysdmx/io/_pd_utils.py,sha256=NgGhjn9clB0xD_3hsOzwXci8ix9dLe2Lt2DAZ9Tkyzw,2555
29
28
  pysdmx/io/csv/__csv_aux_reader.py,sha256=2RGzhga_VDnh2OVX_Bo-rR2hgAQxHXa7zt7-D5MVBu0,3994
30
- pysdmx/io/csv/__csv_aux_writer.py,sha256=4PlnMuzv_XUoJnZCS8GjqzTpjsSf79bmD9KTmTH24KE,4779
29
+ pysdmx/io/csv/__csv_aux_writer.py,sha256=lCPl6hpyp12xhXlyimRlxxOT33RcgXDGTQZb8JguksI,4118
31
30
  pysdmx/io/csv/__init__.py,sha256=53f2rPkwILigOqArgRQOOwLk-ha6zVTe4EypIsR7K6I,107
32
31
  pysdmx/io/csv/sdmx10/__init__.py,sha256=NAAm_yodK-gzkuzewGQeYpF3f5nZmDA4vWGfT2KGTWc,38
33
- pysdmx/io/csv/sdmx10/reader/__init__.py,sha256=l7nK6Q-VpOhd_VFYNxvd05b8GxyNQXpI7aBPQYvy4LM,2733
34
- pysdmx/io/csv/sdmx10/writer/__init__.py,sha256=_6u3ANN84JW1wb-9YpgzKSXcpMdUBRmDtFK2fVG_r7I,2959
32
+ pysdmx/io/csv/sdmx10/reader/__init__.py,sha256=uGc-sv4YXHteVQZPTdkVUkVZ6iKY7h7Fg56dw7VZ2UU,2735
33
+ pysdmx/io/csv/sdmx10/writer/__init__.py,sha256=d-kLcP711k1nmG_D4whDxqWCzODRT7HTqk95N-jXBK8,2923
35
34
  pysdmx/io/csv/sdmx20/__init__.py,sha256=6_YCb4iuUWJRS9y0KSdf4ebNKblSlnTTzNC5c19kNk8,38
36
- pysdmx/io/csv/sdmx20/reader/__init__.py,sha256=DJMLkE4YKlBCxK4R38R3JXkd3pfiYtNa9HB8PIim0cQ,2844
37
- pysdmx/io/csv/sdmx20/writer/__init__.py,sha256=vaeaRT1qtAl3dkkzL2EeCrkpLmeL_r3ivqqakVDxAn0,2456
35
+ pysdmx/io/csv/sdmx20/reader/__init__.py,sha256=PmVXd8QXvXej6XSDAPsIc8VptLk69NK37-wunHH7Pvc,2846
36
+ pysdmx/io/csv/sdmx20/writer/__init__.py,sha256=puksYRzcog3wv9JGWA--6rvv9aRAn86Vsv1CyA7Em-c,2489
38
37
  pysdmx/io/csv/sdmx21/__init__.py,sha256=I3_dwi4A4if62_mwEjqbOa-F7mhoIMf0D6szpDf3W7c,38
39
- pysdmx/io/csv/sdmx21/reader/__init__.py,sha256=hoXkOJM8prZZ6QESG3ZjReN2P-8pGryN6CPeEYtrqjw,2864
40
- pysdmx/io/csv/sdmx21/writer/__init__.py,sha256=B-0ZYFxUm1cmv0-rwDaE4TmIE0fnAeojNPIXXP2ATXQ,2451
38
+ pysdmx/io/csv/sdmx21/reader/__init__.py,sha256=J1cCkZh3klgZZWjdQ_U1zkfzT_DVzQmdreGZhN33SLs,2866
39
+ pysdmx/io/csv/sdmx21/writer/__init__.py,sha256=CH8Nm7hqvXyN6XM_D2nJRmbKj6CJV-X1QzSF0WJrs0E,2484
41
40
  pysdmx/io/format.py,sha256=EO-PyYpiU0WswvEGA5UHokladxPezcwBUo1AJTqxp1Q,5250
42
41
  pysdmx/io/input_processor.py,sha256=P1_jKegrOyV7EaZLjLrq8fX2u1EI7gPBJoKvlBCNkP0,6967
43
42
  pysdmx/io/json/fusion/messages/__init__.py,sha256=ac2jWfjGGBcfoSutiKy68LzqwNp_clt2RzmJOaYCxL0,2142
@@ -47,7 +46,7 @@ pysdmx/io/json/fusion/messages/concept.py,sha256=m4lTyncSIriFXWWupE-zLxUVPx3Xrg5
47
46
  pysdmx/io/json/fusion/messages/constraint.py,sha256=dPkzhCWN49Y9ReSZPRFTdM6GWc0rU2BZTyFfWsqlX34,615
48
47
  pysdmx/io/json/fusion/messages/core.py,sha256=GdzF3TNUGrB0gxuaaSpk9LaYqcdy_M6L2azExZQfM0Q,4843
49
48
  pysdmx/io/json/fusion/messages/dataflow.py,sha256=lsaMPjmA-KiM51I78wrONfNHyvfBSeAll5Sp0jmTezc,2972
50
- pysdmx/io/json/fusion/messages/dsd.py,sha256=9QO0IzDSa3sHRTt6tvRAE1tAieugxB55tJ0KmeHf230,10136
49
+ pysdmx/io/json/fusion/messages/dsd.py,sha256=hnVOY0GZSuYQFCZd-hcjGd_YEacUue_9S4YjpSJJigM,10341
51
50
  pysdmx/io/json/fusion/messages/map.py,sha256=TPsCFuUfk5Jhhe7CNvEoHuFNZFpHhvNiYFWeIEUx-sc,7695
52
51
  pysdmx/io/json/fusion/messages/metadataflow.py,sha256=Js4j8lUF9ZwqL7lJUrfrjk9tmBmRQPt8qxdrfnZ6R5E,1374
53
52
  pysdmx/io/json/fusion/messages/mpa.py,sha256=WvcHn7Pa_UBHxkZbfSzIxc8qeeMfTWThxgCRHuioXFY,1494
@@ -73,7 +72,7 @@ pysdmx/io/json/sdmxjson2/messages/concept.py,sha256=x7BoG6AaziZGNjxeypwy_lsFTmdH
73
72
  pysdmx/io/json/sdmxjson2/messages/constraint.py,sha256=TCg-Z6ZkZSzlhjvaZebKk1wL_CPhmJzyKjEkE4FPkAc,9055
74
73
  pysdmx/io/json/sdmxjson2/messages/core.py,sha256=qF0fscWY1yRxmN-4s2UweEWqbDMSioaUPaxlYEo8ouY,10137
75
74
  pysdmx/io/json/sdmxjson2/messages/dataflow.py,sha256=wjeq9yexTa012AtGdZsZflp3WQ6fP-3kas-gxADTFeQ,6256
76
- pysdmx/io/json/sdmxjson2/messages/dsd.py,sha256=CMnFZ97oXEJEIO-OnX-uZkSpPN-f5E6jbJo_q8WrYHE,19854
75
+ pysdmx/io/json/sdmxjson2/messages/dsd.py,sha256=ZLFISJYtpP4cPQwcIjMkEGVPBE4Yy6cCKvwGw0xjFh4,20431
77
76
  pysdmx/io/json/sdmxjson2/messages/map.py,sha256=ZB7XPX6nUcu2MMHAsgwAR0nxlbEQF7YasplhlS5K9EI,16774
78
77
  pysdmx/io/json/sdmxjson2/messages/metadataflow.py,sha256=1hKCyzTEAvB_MOgmtjBObC9RVNSge7Sick6nQMwl17o,2994
79
78
  pysdmx/io/json/sdmxjson2/messages/mpa.py,sha256=ryoQCvOvd2j6gPdGOEML4bc-NXUSetuKNOfmd9Ogn2s,3050
@@ -85,7 +84,7 @@ pysdmx/io/json/sdmxjson2/messages/schema.py,sha256=JwFYjgvhK_1NN5KQIUYNb0ul4ywQh
85
84
  pysdmx/io/json/sdmxjson2/messages/structure.py,sha256=c0dyTJK49UpGvL1iLFaFLYFkT89kzvXwk65qd_j-Y1U,12738
86
85
  pysdmx/io/json/sdmxjson2/messages/vtl.py,sha256=C-JQY1_W8SrJd2lLdUGCmQO9Br1pdqdT8WmB1K4e_yY,35284
87
86
  pysdmx/io/json/sdmxjson2/reader/__init__.py,sha256=RbNnZSrGQa4OE0HBWJau9tPFSQbDklcKZaBWOzxEw4I,1629
88
- pysdmx/io/json/sdmxjson2/reader/doc_validation.py,sha256=PsY_VEJOuEtXj7pRgabiEbWBSWjTlK2oN-ayU0XIXC4,3680
87
+ pysdmx/io/json/sdmxjson2/reader/doc_validation.py,sha256=Li0N5UmTKJy_FWT5vfwt7DEtp8xiYaXccDRBna_Q0Jw,3822
89
88
  pysdmx/io/json/sdmxjson2/reader/metadata.py,sha256=FT9CEWjrVfUK4vTEqs-f2ZO6jWeRRkEHCjsHMNgKQp0,1230
90
89
  pysdmx/io/json/sdmxjson2/reader/structure.py,sha256=PKHvH_lY2XJtKkg5rGbGSHyYu_raGLrBuaEy8BKr6U0,1209
91
90
  pysdmx/io/json/sdmxjson2/writer/__init__.py,sha256=DZGkas1ghei4p6SZsIQI1LPToS-d8F1Nx75MC8reT7g,270
@@ -104,8 +103,8 @@ pysdmx/io/xml/__structure_aux_reader.py,sha256=50UPOCk2XnCU4J1hQNAXiGL2n8QPXdf4z
104
103
  pysdmx/io/xml/__structure_aux_writer.py,sha256=0i08hvFw2TfRElaGAeTwfsOaOpw8XWBlZ_zWdxmLPkM,45612
105
104
  pysdmx/io/xml/__tokens.py,sha256=M0x-tgoh6_pzL_BP-MArCu3w0NO-AUS6bR-W6BIEJG8,6891
106
105
  pysdmx/io/xml/__write_aux.py,sha256=c3HgDMey8nBXyeT_yU8PWdk25bgYyX49R21fLv8CgZc,15534
107
- pysdmx/io/xml/__write_data_aux.py,sha256=ebuqtz97wa7scNM7cO0A_Cr40TXmRm3qqYbOjALj6wY,3547
108
- pysdmx/io/xml/__write_structure_specific_aux.py,sha256=zNep8HYFYnBYjOKZZa7PWRhc60rnRMmnfJh39QMaPtg,9292
106
+ pysdmx/io/xml/__write_data_aux.py,sha256=mnJpooA4koqGbKhF-6eonT3drQ-qCuwwP-lfZLHKqVQ,3403
107
+ pysdmx/io/xml/__write_structure_specific_aux.py,sha256=reRDVw4Xwag0ODyMzm9uOk9WJ_e1ELxAPYHSMUUDJBQ,8919
109
108
  pysdmx/io/xml/config.py,sha256=R24cczVkzkhjVLXpv-qfEm88W3_QTqVt2Qofi8IvJ5Y,93
110
109
  pysdmx/io/xml/doc_validation.py,sha256=WXDhte96VEAeZMMHJ0Y68WW8HEoOhEiOYEnbGP5Zwjw,1795
111
110
  pysdmx/io/xml/header.py,sha256=My03uhWD3AkfTwfUqiblmLIZuqd7uvIEYsOial6TClg,5971
@@ -118,7 +117,7 @@ pysdmx/io/xml/sdmx21/reader/structure_specific.py,sha256=S3-gLmaBFjBRIr25qQtlrao
118
117
  pysdmx/io/xml/sdmx21/reader/submission.py,sha256=8daiBW-sIVGaB6lYwHqJNkLI7IixMSydCK-0ZO8ri4I,1711
119
118
  pysdmx/io/xml/sdmx21/writer/__init__.py,sha256=QQGFAss26njCC4eKYxhBcI9LYm5NHuJaAJGKCrIrL80,31
120
119
  pysdmx/io/xml/sdmx21/writer/error.py,sha256=0wkX7K_n2oZNkOKg_zpl9Id82qP72Lqof-T-ZLGoZ1M,353
121
- pysdmx/io/xml/sdmx21/writer/generic.py,sha256=_ouKoVndG7Jj8_EAvUXPK1RbDKdKemh0kSRaDPFjTHo,16214
120
+ pysdmx/io/xml/sdmx21/writer/generic.py,sha256=8_kUMMUiIFVdKMyhpR2LKDyfiinBohO_aL_6GZpOInY,15786
122
121
  pysdmx/io/xml/sdmx21/writer/structure.py,sha256=S3qoNgXxrakn2V4NLdL5U5mAA16XisI0PpJDuxqalFE,2084
123
122
  pysdmx/io/xml/sdmx21/writer/structure_specific.py,sha256=iXc1J-RzoKyRznvgGgdTSeUfyqZLouI8CtSq2YhGBWI,2877
124
123
  pysdmx/io/xml/sdmx30/__init__.py,sha256=8BScJFEgLy8DoUreu2RBUtxjGjKyClkKBI_Qtarbk-Y,38
@@ -140,9 +139,9 @@ pysdmx/model/__base.py,sha256=M1O-uT8RqeKZpGT09HD6ifjPl0F9ORxlRYra3fn8qCM,15182
140
139
  pysdmx/model/__init__.py,sha256=UPZtum_PF-nPPQa21Bq1doUXLCoU-yRGPh45ZXdUI9k,5493
141
140
  pysdmx/model/category.py,sha256=ksYIOGPHgZI619RhmRXZUXHP_juY9im40tWzR2yuMEc,6796
142
141
  pysdmx/model/code.py,sha256=Wu6rEXeZf_XA0aBrDXgN-3yvySAHH7SAjrWliFlmC24,12799
143
- pysdmx/model/concept.py,sha256=aEVUZVeS2NUzvQ0MZLjFT7iKRFHzhH6oC2vmH7kHLXI,10325
142
+ pysdmx/model/concept.py,sha256=mQfqJdtWc10WdTKX_Mw7Znw65cN3QO-kCar9MWYeWO4,9645
144
143
  pysdmx/model/constraint.py,sha256=MwI_GLKzwkuo0BzAsgcnDeB-b7bq8qqwHNte5JjCEFA,1960
145
- pysdmx/model/dataflow.py,sha256=9EzGn-EDm1OQa52N5ep8VApZoj7lHWfIs-W5tnBP9FY,23954
144
+ pysdmx/model/dataflow.py,sha256=IyVBWIKSkl6Qm5R2a6n_uPveUPsIpaYUMc_4hcwJ4Uw,24196
146
145
  pysdmx/model/dataset.py,sha256=Lbr7tYonGHD3NZUD-M9hK2puaEAluOVPG2DbkOohzMM,4861
147
146
  pysdmx/model/gds.py,sha256=QrnmI8Hn--C95gGXCeUeWwhn-Ur7DuT08Cg7oPJIEVI,4976
148
147
  pysdmx/model/map.py,sha256=9a3hl6efq_5kAYuJWkepoQOkao8Eqk17N69JGyRfxsk,17506
@@ -164,7 +163,7 @@ pysdmx/util/__init__.py,sha256=m_XWRAmVJ7F6ai4Ckrj_YuPbhg3cJZAXeZrEThrL88k,3997
164
163
  pysdmx/util/_date_pattern_map.py,sha256=IS1qONwVHbTBNIFCT0Rqbijj2a9mYvs7onXSK6GeQAQ,1620
165
164
  pysdmx/util/_model_utils.py,sha256=nQ1yWBt-tZYDios9xvRvJ7tHq6A8_RoGdY1wi7WGz2w,3793
166
165
  pysdmx/util/_net_utils.py,sha256=nOTz_x3FgFrwKh42_J70IqYXz9duQkMFJWtejZXcLHs,1326
167
- pysdmx-1.10.0rc2.dist-info/METADATA,sha256=ljfxmPVZvRL2pcDovZhz986lhAuMwF6mlwgm1qEoA14,4852
168
- pysdmx-1.10.0rc2.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
169
- pysdmx-1.10.0rc2.dist-info/licenses/LICENSE,sha256=3XTNDPtv2RxDUNkQzn9MIWit2u7_Ob5daMLEq-4pBJs,649
170
- pysdmx-1.10.0rc2.dist-info/RECORD,,
166
+ pysdmx-1.10.1.dist-info/METADATA,sha256=tXCxkMOnWoEJWTBRH4ZvzO1k20lm9cxDIneSSfQ8pzE,4849
167
+ pysdmx-1.10.1.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
168
+ pysdmx-1.10.1.dist-info/licenses/LICENSE,sha256=3XTNDPtv2RxDUNkQzn9MIWit2u7_Ob5daMLEq-4pBJs,649
169
+ pysdmx-1.10.1.dist-info/RECORD,,
pysdmx/io/_pd_utils.py DELETED
@@ -1,83 +0,0 @@
1
- import pandas as pd
2
-
3
- from pysdmx.errors import Invalid
4
- from pysdmx.model.concept import DataType
5
- from pysdmx.model.dataflow import Schema
6
-
7
- NUMERIC_TYPES = {
8
- DataType.BIG_INTEGER,
9
- DataType.COUNT,
10
- DataType.DECIMAL,
11
- DataType.DOUBLE,
12
- DataType.FLOAT,
13
- DataType.INCREMENTAL,
14
- DataType.INTEGER,
15
- DataType.LONG,
16
- DataType.SHORT,
17
- }
18
-
19
-
20
- def _fill_na_values(data: pd.DataFrame, structure: Schema) -> pd.DataFrame:
21
- """Fills missing values in the DataFrame based on the component type.
22
-
23
- Numeric components are filled with "NaN".
24
- Other components are filled with "#N/A".
25
- If the structure does not have components,
26
- all missing values are filled with "".
27
-
28
- Args:
29
- data: The DataFrame to fill.
30
- structure: The structure definition (´Schema´).
31
-
32
- Returns:
33
- The DataFrame with filled missing values.
34
-
35
- Raises:
36
- Invalid: If the structure does not have components.
37
- """
38
- for component in structure.components:
39
- if component.id in data.columns:
40
- if component.dtype in NUMERIC_TYPES:
41
- data[component.id] = (
42
- data[component.id].astype(object).fillna("NaN")
43
- )
44
- else:
45
- data[component.id] = (
46
- data[component.id].astype(object).fillna("#N/A")
47
- )
48
-
49
- return data
50
-
51
-
52
- def _validate_explicit_null_values(
53
- data: pd.DataFrame, structure: Schema
54
- ) -> None:
55
- """Validates that explicit null values are correct for the component type.
56
-
57
- Numeric components must not contain "#N/A".
58
- Non-numeric components must not contain "NaN".
59
-
60
- Args:
61
- data: The DataFrame to validate.
62
- structure: The structure definition (´Schema´).
63
-
64
- Raises:
65
- Invalid: If invalid null values are found.
66
- """
67
- for component in structure.components:
68
- if component.id in data.columns:
69
- series = data[component.id].astype(str)
70
- if component.dtype in NUMERIC_TYPES:
71
- # Numeric: #N/A is invalid
72
- if series.isin(["#N/A"]).any():
73
- raise Invalid(
74
- f"Invalid null value '#N/A' in numeric component "
75
- f"'{component.id}'."
76
- )
77
- else:
78
- # Non-numeric: NaN is invalid
79
- if series.isin(["NaN"]).any():
80
- raise Invalid(
81
- f"Invalid null value 'NaN' in non-numeric component "
82
- f"'{component.id}'."
83
- )