pysdmx 1.10.0rc1__py3-none-any.whl → 1.10.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. pysdmx/__init__.py +1 -1
  2. pysdmx/api/fmr/__init__.py +3 -2
  3. pysdmx/io/_pd_utils.py +83 -0
  4. pysdmx/io/csv/__csv_aux_writer.py +23 -0
  5. pysdmx/io/csv/sdmx10/reader/__init__.py +1 -1
  6. pysdmx/io/csv/sdmx10/writer/__init__.py +15 -9
  7. pysdmx/io/csv/sdmx20/reader/__init__.py +1 -1
  8. pysdmx/io/csv/sdmx20/writer/__init__.py +1 -1
  9. pysdmx/io/csv/sdmx21/reader/__init__.py +1 -1
  10. pysdmx/io/csv/sdmx21/writer/__init__.py +1 -1
  11. pysdmx/io/json/sdmxjson2/messages/__init__.py +4 -0
  12. pysdmx/io/json/sdmxjson2/messages/code.py +16 -6
  13. pysdmx/io/json/sdmxjson2/messages/constraint.py +235 -16
  14. pysdmx/io/json/sdmxjson2/messages/dsd.py +35 -7
  15. pysdmx/io/json/sdmxjson2/messages/map.py +5 -4
  16. pysdmx/io/json/sdmxjson2/messages/metadataflow.py +1 -0
  17. pysdmx/io/json/sdmxjson2/messages/msd.py +18 -10
  18. pysdmx/io/json/sdmxjson2/messages/schema.py +2 -2
  19. pysdmx/io/json/sdmxjson2/messages/structure.py +81 -44
  20. pysdmx/io/json/sdmxjson2/messages/vtl.py +13 -9
  21. pysdmx/io/xml/__write_data_aux.py +20 -7
  22. pysdmx/io/xml/__write_structure_specific_aux.py +71 -54
  23. pysdmx/io/xml/sdmx21/writer/generic.py +31 -19
  24. pysdmx/model/__base.py +46 -1
  25. pysdmx/model/__init__.py +18 -0
  26. pysdmx/model/category.py +17 -0
  27. pysdmx/model/concept.py +16 -0
  28. pysdmx/model/constraint.py +69 -0
  29. pysdmx/model/message.py +80 -71
  30. {pysdmx-1.10.0rc1.dist-info → pysdmx-1.10.0rc2.dist-info}/METADATA +1 -1
  31. {pysdmx-1.10.0rc1.dist-info → pysdmx-1.10.0rc2.dist-info}/RECORD +33 -31
  32. {pysdmx-1.10.0rc1.dist-info → pysdmx-1.10.0rc2.dist-info}/WHEEL +0 -0
  33. {pysdmx-1.10.0rc1.dist-info → pysdmx-1.10.0rc2.dist-info}/licenses/LICENSE +0 -0
@@ -1,10 +1,11 @@
1
1
  # mypy: disable-error-code="union-attr"
2
2
  """Module for writing SDMX-ML 3.0 Structure Specific auxiliary functions."""
3
3
 
4
- from typing import Any, Dict, List
4
+ from typing import Any, Dict, Hashable, List
5
5
 
6
6
  import pandas as pd
7
7
 
8
+ from pysdmx.io._pd_utils import _fill_na_values
8
9
  from pysdmx.io.pd import PandasDataset
9
10
  from pysdmx.io.xml.__write_aux import (
10
11
  ABBR_MSG,
@@ -69,9 +70,6 @@ def __write_data_structure_specific(
69
70
  outfile = ""
70
71
 
71
72
  for i, (short_urn, dataset) in enumerate(datasets.items()):
72
- dataset.data = dataset.data.astype(str).replace(
73
- {"nan": "", "<NA>": ""}
74
- )
75
73
  outfile += __write_data_single_dataset(
76
74
  dataset=dataset,
77
75
  prettyprint=prettyprint,
@@ -115,8 +113,12 @@ def __write_data_single_dataset(
115
113
  structure_urn = get_structure(dataset)
116
114
  id_structure = parse_short_urn(structure_urn).id
117
115
  sdmx_type = parse_short_urn(structure_urn).id
116
+
117
+ # Validate structure before writing
118
+ schema = writing_validation(dataset)
119
+
118
120
  # Remove nan values from DataFrame
119
- dataset.data = dataset.data.fillna("").astype(str).replace("nan", "")
121
+ dataset.data = _fill_na_values(dataset.data, schema)
120
122
 
121
123
  nl = "\n" if prettyprint else ""
122
124
  child1 = "\t" if prettyprint else ""
@@ -139,7 +141,6 @@ def __write_data_single_dataset(
139
141
  if dim == ALL_DIM:
140
142
  data += __memory_optimization_writing(dataset, prettyprint)
141
143
  else:
142
- writing_validation(dataset)
143
144
  series_codes, obs_codes, group_codes = get_codes(
144
145
  dimension_code=dim,
145
146
  structure=dataset.structure, # type: ignore[arg-type]
@@ -230,69 +231,85 @@ def __obs_processing(data: pd.DataFrame, prettyprint: bool = True) -> str:
230
231
  return "".join(iterator)
231
232
 
232
233
 
233
- def __series_processing(
234
- data: pd.DataFrame,
235
- series_codes: List[str],
236
- obs_codes: List[str],
237
- prettyprint: bool = True,
234
+ def __format_ser_str(
235
+ data_info: Dict[Any, Any], prettyprint: bool = True
238
236
  ) -> str:
239
- def __generate_series_str() -> str:
240
- """Generates the series item with its observations."""
241
- out_list: List[str] = []
242
- data.groupby(by=series_codes)[obs_codes].apply(
243
- lambda x: __format_dict_ser(out_list, x)
244
- )
245
-
246
- return "".join(out_list)
247
-
248
- def __format_dict_ser(
249
- output_list: List[str],
250
- obs: Any,
251
- ) -> Any:
252
- """Formats the series as key=value pairs."""
253
- # Creating the observation dict,
254
- # we always get the first element on Series
255
- # as we are grouping by it
256
- data_dict["Series"][0]["Obs"] = obs.to_dict(orient="records")
257
- output_list.append(__format_ser_str(data_dict["Series"][0]))
258
- # We remove the data for series as it is no longer necessary
259
- del data_dict["Series"][0]
237
+ """Formats the series as key=value pairs."""
238
+ child2 = "\t\t" if prettyprint else ""
239
+ child3 = "\t\t\t" if prettyprint else ""
240
+ nl = "\n" if prettyprint else ""
260
241
 
261
- def __format_ser_str(data_info: Dict[Any, Any]) -> str:
262
- """Formats the series as key=value pairs."""
263
- child2 = "\t\t" if prettyprint else ""
264
- child3 = "\t\t\t" if prettyprint else ""
265
- nl = "\n" if prettyprint else ""
242
+ out_element = f"{child2}<Series "
266
243
 
267
- out_element = f"{child2}<Series "
244
+ for k, v in data_info.items():
245
+ if k != "Obs":
246
+ out_element += f"{k}={__escape_xml(str(v))!r} "
268
247
 
269
- for k, v in data_info.items():
270
- if k != "Obs":
271
- out_element += f"{k}={__escape_xml(str(v))!r} "
248
+ out_element += f">{nl}"
272
249
 
273
- out_element += f">{nl}"
250
+ for obs in data_info["Obs"]:
251
+ out_element += f"{child3}<Obs "
274
252
 
275
- for obs in data_info["Obs"]:
276
- out_element += f"{child3}<Obs "
253
+ for k, v in obs.items():
254
+ out_element += f"{k}={__escape_xml(str(v))!r} "
277
255
 
278
- for k, v in obs.items():
279
- out_element += f"{k}={__escape_xml(str(v))!r} "
256
+ out_element += f"/>{nl}"
280
257
 
281
- out_element += f"/>{nl}"
258
+ out_element += f"{child2}</Series>{nl}"
282
259
 
283
- out_element += f"{child2}</Series>{nl}"
260
+ return out_element
284
261
 
285
- return out_element
286
262
 
287
- # Getting each datapoint from data and creating dict
288
- data = data.sort_values(series_codes, axis=0)
289
- data_dict = {
263
+ def __build_series_dict(
264
+ data: pd.DataFrame, series_codes: List[str]
265
+ ) -> Dict[str, List[Dict[Hashable, Any]]]:
266
+ """Build series dictionary from data."""
267
+ if not series_codes:
268
+ return {"Series": [{}] if not data.empty else []}
269
+ return {
290
270
  "Series": data[series_codes]
291
271
  .drop_duplicates()
292
272
  .reset_index(drop=True)
293
273
  .to_dict(orient="records")
294
274
  }
295
275
 
296
- out = __generate_series_str()
297
276
 
298
- return out
277
+ def __process_series_observations(
278
+ data: pd.DataFrame,
279
+ series_codes: List[str],
280
+ obs_codes: List[str],
281
+ data_dict: Dict[str, List[Dict[Hashable, Any]]],
282
+ prettyprint: bool = True,
283
+ ) -> str:
284
+ """Process series and their observations into XML string."""
285
+ out_list: List[str] = []
286
+
287
+ def append_series_with_obs(obs: Any) -> str:
288
+ """Append series with observations to output list."""
289
+ data_dict["Series"][0]["Obs"] = obs.to_dict(orient="records")
290
+ result = __format_ser_str(data_dict["Series"][0], prettyprint)
291
+ out_list.append(result)
292
+ del data_dict["Series"][0]
293
+ return result
294
+
295
+ if not series_codes:
296
+ if not data.empty:
297
+ append_series_with_obs(data[obs_codes])
298
+ else:
299
+ data.groupby(by=series_codes)[obs_codes].apply(append_series_with_obs)
300
+
301
+ return "".join(out_list)
302
+
303
+
304
+ def __series_processing(
305
+ data: pd.DataFrame,
306
+ series_codes: List[str],
307
+ obs_codes: List[str],
308
+ prettyprint: bool = True,
309
+ ) -> str:
310
+ """Write series to SDMX-ML Structure-Specific format."""
311
+ data = data.sort_values(series_codes, axis=0)
312
+ data_dict = __build_series_dict(data, series_codes)
313
+ return __process_series_observations(
314
+ data, series_codes, obs_codes, data_dict, prettyprint
315
+ )
@@ -2,10 +2,11 @@
2
2
  """Module for writing SDMX-ML 2.1 Generic data messages."""
3
3
 
4
4
  from pathlib import Path
5
- from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
5
+ from typing import Any, Dict, Hashable, List, Optional, Sequence, Tuple, Union
6
6
 
7
7
  import pandas as pd
8
8
 
9
+ from pysdmx.io._pd_utils import _fill_na_values
9
10
  from pysdmx.io.format import Format
10
11
  from pysdmx.io.pd import PandasDataset
11
12
  from pysdmx.io.xml.__write_aux import (
@@ -122,7 +123,6 @@ def __write_data_generic(
122
123
 
123
124
  for short_urn, dataset in datasets.items():
124
125
  writing_validation(dataset)
125
- dataset.data = dataset.data.fillna("").astype(str)
126
126
  outfile += __write_data_single_dataset(
127
127
  dataset=dataset,
128
128
  prettyprint=prettyprint,
@@ -160,7 +160,8 @@ def __write_data_single_dataset(
160
160
  outfile = ""
161
161
  structure_urn = get_structure(dataset)
162
162
  id_structure = parse_short_urn(structure_urn).id
163
- dataset.data = dataset.data.fillna("").astype(str).replace("nan", "")
163
+ schema = writing_validation(dataset)
164
+ dataset.data = _fill_na_values(dataset.data, schema)
164
165
 
165
166
  nl = "\n" if prettyprint else ""
166
167
  child1 = "\t" if prettyprint else ""
@@ -347,9 +348,14 @@ def __series_processing(
347
348
  ) -> str:
348
349
  def __generate_series_str() -> str:
349
350
  out_list: List[str] = []
350
- data.groupby(by=series_codes + series_att_codes)[data.columns].apply(
351
- lambda x: __format_dict_ser(out_list, x)
352
- )
351
+ group_cols = series_codes + series_att_codes
352
+ if not group_cols:
353
+ if not data.empty:
354
+ __format_dict_ser(out_list, data)
355
+ else:
356
+ data.groupby(by=group_cols)[data.columns].apply(
357
+ lambda x: __format_dict_ser(out_list, x)
358
+ )
353
359
 
354
360
  return "".join(out_list)
355
361
 
@@ -359,13 +365,14 @@ def __series_processing(
359
365
  ) -> Any:
360
366
  obs_data = group_data[obs_codes + obs_att_codes].copy()
361
367
  data_dict["Series"][0]["Obs"] = obs_data.to_dict(orient="records")
362
- data_dict["Series"][0].update(
363
- {
364
- k: v
365
- for k, v in group_data[series_att_codes].iloc[0].items()
366
- if k in series_att_codes
367
- }
368
- )
368
+ if series_att_codes:
369
+ data_dict["Series"][0].update(
370
+ {
371
+ k: v
372
+ for k, v in group_data[series_att_codes].iloc[0].items()
373
+ if k in series_att_codes
374
+ }
375
+ )
369
376
  output_list.append(
370
377
  __format_ser_str(
371
378
  data_info=data_dict["Series"][0],
@@ -380,12 +387,17 @@ def __series_processing(
380
387
 
381
388
  # Getting each datapoint from data and creating dict
382
389
  data = data.sort_values(series_codes, axis=0)
383
- data_dict = {
384
- "Series": data[series_codes]
385
- .drop_duplicates()
386
- .reset_index(drop=True)
387
- .to_dict(orient="records")
388
- }
390
+ if not series_codes:
391
+ data_dict: Dict[str, List[Dict[Hashable, Any]]] = {
392
+ "Series": [{}] if not data.empty else []
393
+ }
394
+ else:
395
+ data_dict = {
396
+ "Series": data[series_codes]
397
+ .drop_duplicates()
398
+ .reset_index(drop=True)
399
+ .to_dict(orient="records")
400
+ }
389
401
 
390
402
  out = __generate_series_str()
391
403
 
pysdmx/model/__base.py CHANGED
@@ -1,5 +1,6 @@
1
+ import re
1
2
  from datetime import datetime
2
- from typing import Any, Optional, Sequence, Union
3
+ from typing import Any, Literal, Optional, Sequence, Union
3
4
 
4
5
  from msgspec import Struct
5
6
 
@@ -327,6 +328,50 @@ class ItemScheme(MaintainableArtefact, frozen=True, omit_defaults=True):
327
328
  items: Sequence[Item] = ()
328
329
  is_partial: bool = False
329
330
 
331
+ def search(
332
+ self,
333
+ query: str,
334
+ use_regex: bool = False,
335
+ fields: Literal["name", "description", "all"] = "all",
336
+ ) -> Sequence[Item]:
337
+ """Search for items matching the query.
338
+
339
+ Args:
340
+ query: The substring or regex pattern to search for.
341
+ use_regex: Whether to treat the query as a regex (default: False).
342
+ fields: The fields to search in (default: all textual fields).
343
+
344
+ Returns:
345
+ Items that match the query.
346
+ """
347
+ if not query:
348
+ raise Invalid(
349
+ "Invalid search", "The query string cannot be empty."
350
+ )
351
+
352
+ # Determine which fields to search in
353
+ search_fields = (
354
+ ["name", "description"] if fields == "all" else [fields]
355
+ )
356
+
357
+ # Transform plain text queries into a regex
358
+ if not use_regex:
359
+ query = re.escape(query)
360
+
361
+ pattern = re.compile(query, re.IGNORECASE if not use_regex else 0)
362
+
363
+ all_items = getattr(self, "all_items", "")
364
+ items = all_items if all_items else self.items
365
+
366
+ return [
367
+ item # type: ignore[misc]
368
+ for item in items
369
+ if any(
370
+ pattern.search(str(getattr(item, field, "")))
371
+ for field in search_fields
372
+ )
373
+ ]
374
+
330
375
 
331
376
  class DataflowRef(
332
377
  Struct, frozen=True, omit_defaults=True, repr_omit_defaults=True, tag=True
pysdmx/model/__init__.py CHANGED
@@ -29,6 +29,16 @@ from pysdmx.model.code import (
29
29
  HierarchyAssociation,
30
30
  )
31
31
  from pysdmx.model.concept import Concept, ConceptScheme, DataType, Facets
32
+ from pysdmx.model.constraint import (
33
+ ConstraintAttachment,
34
+ CubeKeyValue,
35
+ CubeRegion,
36
+ CubeValue,
37
+ DataConstraint,
38
+ DataKey,
39
+ DataKeyValue,
40
+ KeySet,
41
+ )
32
42
  from pysdmx.model.dataflow import (
33
43
  ArrayBoundaries,
34
44
  Component,
@@ -161,9 +171,16 @@ __all__ = [
161
171
  "ComponentMap",
162
172
  "Concept",
163
173
  "ConceptScheme",
174
+ "ConstraintAttachment",
164
175
  "Contact",
176
+ "CubeKeyValue",
177
+ "CubeRegion",
178
+ "CubeValue",
165
179
  "DataConsumer",
166
180
  "DataConsumerScheme",
181
+ "DataConstraint",
182
+ "DataKey",
183
+ "DataKeyValue",
167
184
  "Dataflow",
168
185
  "DataflowInfo",
169
186
  "DataflowRef",
@@ -180,6 +197,7 @@ __all__ = [
180
197
  "HierarchyAssociation",
181
198
  "ImplicitComponentMap",
182
199
  "ItemReference",
200
+ "KeySet",
183
201
  "MetadataAttribute",
184
202
  "MetadataComponent",
185
203
  "Metadataflow",
pysdmx/model/category.py CHANGED
@@ -91,6 +91,15 @@ class CategoryScheme(ItemScheme, frozen=True, omit_defaults=True):
91
91
  flows.update(self.__extract_flows(cat))
92
92
  return list(flows)
93
93
 
94
+ @property
95
+ def all_items(self) -> Sequence[Category]:
96
+ """Get all the categories in the category scheme as a flat list.
97
+
98
+ Returns:
99
+ A flat list of all the categories present in the category scheme.
100
+ """
101
+ return self.__get_categories(self.categories)
102
+
94
103
  def __iter__(self) -> Iterator[Category]:
95
104
  """Return an iterator over the list of categories."""
96
105
  yield from self.categories
@@ -160,6 +169,14 @@ class CategoryScheme(ItemScheme, frozen=True, omit_defaults=True):
160
169
  processed_output.append(f"{attr}: {value}")
161
170
  return f"{', '.join(processed_output)}"
162
171
 
172
+ def __get_categories(self, cats: Sequence[Category]) -> Sequence[Category]:
173
+ out = []
174
+ for cat in cats:
175
+ out.append(cat)
176
+ if cat.categories:
177
+ out.extend(self.__get_categories(cat.categories))
178
+ return out
179
+
163
180
 
164
181
  class Categorisation(
165
182
  MaintainableArtefact, frozen=True, omit_defaults=True, kw_only=True
pysdmx/model/concept.py CHANGED
@@ -35,6 +35,8 @@ class DataType(str, Enum):
35
35
  """True or False."""
36
36
  COUNT = "Count"
37
37
  """A simple incrementing integer type."""
38
+ DATA_SET_REFERENCE = "DataSetReference"
39
+ """Reference to a data set."""
38
40
  DATE = "GregorianDay"
39
41
  """A ISO 8601 date (e.g. ``2011-06-17``)."""
40
42
  DATE_TIME = "DateTime"
@@ -47,12 +49,24 @@ class DataType(str, Enum):
47
49
  """A decimal number (8 bytes)."""
48
50
  DURATION = "Duration"
49
51
  """An ISO 8601 duration."""
52
+ EXCLUSIVE_VALUE_RANGE = "ExclusiveValueRange"
53
+ """A range of values excluding boundaries."""
50
54
  FLOAT = "Float"
51
55
  """A decimal number (4 bytes)."""
56
+ GEOSPATIAL_INFORMATION = "GeospatialInformation"
57
+ """Geospatial data format."""
52
58
  GREGORIAN_TIME_PERIOD = "GregorianTimePeriod"
53
59
  """This is the union of YEAR, YEAR_MONTH, and DATE."""
60
+ IDENTIFIABLE_REFERENCE = "IdentifiableReference"
61
+ """Reference to an identifiable object."""
62
+ INCLUSIVE_VALUE_RANGE = "InclusiveValueRange"
63
+ """A range of values including boundaries."""
64
+ INCREMENTAL = "Incremental"
65
+ """An integer type that increases sequentially."""
54
66
  INTEGER = "Integer"
55
67
  """A whole number (4 bytes)."""
68
+ KEY_VALUES = "KeyValues"
69
+ """Key values reference."""
56
70
  LONG = "Long"
57
71
  """A whole number (8 bytes)."""
58
72
  MONTH = "Month"
@@ -85,6 +99,8 @@ class DataType(str, Enum):
85
99
  """A string (as immutable sequence of Unicode code points)."""
86
100
  TIME = "Time"
87
101
  """An ISO 8601 time (e.g. ``12:50:42``)."""
102
+ TIMES_RANGE = "TimesRange"
103
+ """A range of time periods."""
88
104
  URI = "URI"
89
105
  """A uniform resource identifier, such as a URL."""
90
106
  XHTML = "XHTML"
@@ -0,0 +1,69 @@
1
+ """Model for SDMX Data Constraints."""
2
+
3
+ from datetime import datetime
4
+ from typing import Optional, Sequence
5
+
6
+ from msgspec import Struct
7
+
8
+ from pysdmx.model.__base import MaintainableArtefact
9
+
10
+
11
+ class CubeValue(Struct, frozen=True, omit_defaults=True):
12
+ """A value of the cube, with optional business validity."""
13
+
14
+ value: str
15
+ valid_from: Optional[datetime] = None
16
+ valid_to: Optional[datetime] = None
17
+
18
+
19
+ class CubeKeyValue(Struct, frozen=True, omit_defaults=True):
20
+ """The list of values for a cube's component."""
21
+
22
+ id: str
23
+ values: Sequence[CubeValue]
24
+
25
+
26
+ class CubeRegion(Struct, frozen=True, omit_defaults=True):
27
+ """A cube region, with its associated values (by default, included)."""
28
+
29
+ key_values: Sequence[CubeKeyValue]
30
+ is_included: bool = True
31
+
32
+
33
+ class ConstraintAttachment(Struct, frozen=True, omit_defaults=True):
34
+ """The artefacts to which the data constraint is attached."""
35
+
36
+ data_provider: Optional[str]
37
+ data_structures: Optional[Sequence[str]] = None
38
+ dataflows: Optional[Sequence[str]] = None
39
+ provision_agreements: Optional[Sequence[str]] = None
40
+
41
+
42
+ class DataKeyValue(Struct, frozen=True, omit_defaults=True):
43
+ """A key value, i.e. a component of the key (e.g. FREQ=M)."""
44
+
45
+ id: str
46
+ value: str
47
+
48
+
49
+ class DataKey(Struct, frozen=True, omit_defaults=True):
50
+ """A data key, i.e. one value per dimension in the data key."""
51
+
52
+ keys_values: Sequence[DataKeyValue]
53
+ valid_from: Optional[datetime] = None
54
+ valid_to: Optional[datetime] = None
55
+
56
+
57
+ class KeySet(Struct, frozen=True, omit_defaults=True):
58
+ """A set of keys, inluded by default."""
59
+
60
+ keys: Sequence[DataKey]
61
+ is_included: bool
62
+
63
+
64
+ class DataConstraint(MaintainableArtefact, frozen=True, omit_defaults=True):
65
+ """A data constraint, defining the allowed or available values."""
66
+
67
+ constraint_attachment: Optional[ConstraintAttachment] = None
68
+ cube_regions: Sequence[CubeRegion] = ()
69
+ key_sets: Sequence[KeySet] = ()