PyPI - pysdmx - Versions diffs - 1.10.0rc1__py3-none-any.whl → 1.10.0rc2__py3-none-any.whl - Mend

pysdmx 1.10.0rc1py3-none-any.whl → 1.10.0rc2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

pysdmx/__init__.py +1 -1
pysdmx/api/fmr/__init__.py +3 -2
pysdmx/io/_pd_utils.py +83 -0
pysdmx/io/csv/__csv_aux_writer.py +23 -0
pysdmx/io/csv/sdmx10/reader/__init__.py +1 -1
pysdmx/io/csv/sdmx10/writer/__init__.py +15 -9
pysdmx/io/csv/sdmx20/reader/__init__.py +1 -1
pysdmx/io/csv/sdmx20/writer/__init__.py +1 -1
pysdmx/io/csv/sdmx21/reader/__init__.py +1 -1
pysdmx/io/csv/sdmx21/writer/__init__.py +1 -1
pysdmx/io/json/sdmxjson2/messages/__init__.py +4 -0
pysdmx/io/json/sdmxjson2/messages/code.py +16 -6
pysdmx/io/json/sdmxjson2/messages/constraint.py +235 -16
pysdmx/io/json/sdmxjson2/messages/dsd.py +35 -7
pysdmx/io/json/sdmxjson2/messages/map.py +5 -4
pysdmx/io/json/sdmxjson2/messages/metadataflow.py +1 -0
pysdmx/io/json/sdmxjson2/messages/msd.py +18 -10
pysdmx/io/json/sdmxjson2/messages/schema.py +2 -2
pysdmx/io/json/sdmxjson2/messages/structure.py +81 -44
pysdmx/io/json/sdmxjson2/messages/vtl.py +13 -9
pysdmx/io/xml/__write_data_aux.py +20 -7
pysdmx/io/xml/__write_structure_specific_aux.py +71 -54
pysdmx/io/xml/sdmx21/writer/generic.py +31 -19
pysdmx/model/__base.py +46 -1
pysdmx/model/__init__.py +18 -0
pysdmx/model/category.py +17 -0
pysdmx/model/concept.py +16 -0
pysdmx/model/constraint.py +69 -0
pysdmx/model/message.py +80 -71
{pysdmx-1.10.0rc1.dist-info → pysdmx-1.10.0rc2.dist-info}/METADATA +1 -1
{pysdmx-1.10.0rc1.dist-info → pysdmx-1.10.0rc2.dist-info}/RECORD +33 -31
{pysdmx-1.10.0rc1.dist-info → pysdmx-1.10.0rc2.dist-info}/WHEEL +0 -0
{pysdmx-1.10.0rc1.dist-info → pysdmx-1.10.0rc2.dist-info}/licenses/LICENSE +0 -0

pysdmx/io/xml/__write_structure_specific_aux.py CHANGED Viewed

@@ -1,10 +1,11 @@
 # mypy: disable-error-code="union-attr"
 """Module for writing SDMX-ML 3.0 Structure Specific auxiliary functions."""
-from typing import Any, Dict, List
+from typing import Any, Dict, Hashable, List
 import pandas as pd
+from pysdmx.io._pd_utils import _fill_na_values
 from pysdmx.io.pd import PandasDataset
 from pysdmx.io.xml.__write_aux import (
     ABBR_MSG,
@@ -69,9 +70,6 @@ def __write_data_structure_specific(
     outfile = ""
     for i, (short_urn, dataset) in enumerate(datasets.items()):
-        dataset.data = dataset.data.astype(str).replace(
-            {"nan": "", "<NA>": ""}
-        )
         outfile += __write_data_single_dataset(
             dataset=dataset,
             prettyprint=prettyprint,
@@ -115,8 +113,12 @@ def __write_data_single_dataset(
     structure_urn = get_structure(dataset)
     id_structure = parse_short_urn(structure_urn).id
     sdmx_type = parse_short_urn(structure_urn).id
+    # Validate structure before writing
+    schema = writing_validation(dataset)
     # Remove nan values from DataFrame
-    dataset.data = dataset.data.fillna("").astype(str).replace("nan", "")
+    dataset.data = _fill_na_values(dataset.data, schema)
     nl = "\n" if prettyprint else ""
     child1 = "\t" if prettyprint else ""
@@ -139,7 +141,6 @@ def __write_data_single_dataset(
     if dim == ALL_DIM:
         data += __memory_optimization_writing(dataset, prettyprint)
     else:
-        writing_validation(dataset)
         series_codes, obs_codes, group_codes = get_codes(
             dimension_code=dim,
             structure=dataset.structure,  # type: ignore[arg-type]
@@ -230,69 +231,85 @@ def __obs_processing(data: pd.DataFrame, prettyprint: bool = True) -> str:
     return "".join(iterator)
-def __series_processing(
-    data: pd.DataFrame,
-    series_codes: List[str],
-    obs_codes: List[str],
-    prettyprint: bool = True,
+def __format_ser_str(
+    data_info: Dict[Any, Any], prettyprint: bool = True
 ) -> str:
-    def __generate_series_str() -> str:
-        """Generates the series item with its observations."""
-        out_list: List[str] = []
-        data.groupby(by=series_codes)[obs_codes].apply(
-            lambda x: __format_dict_ser(out_list, x)
-        )
-        return "".join(out_list)
-    def __format_dict_ser(
-        output_list: List[str],
-        obs: Any,
-    ) -> Any:
-        """Formats the series as key=value pairs."""
-        # Creating the observation dict,
-        # we always get the first element on Series
-        # as we are grouping by it
-        data_dict["Series"][0]["Obs"] = obs.to_dict(orient="records")
-        output_list.append(__format_ser_str(data_dict["Series"][0]))
-        # We remove the data for series as it is no longer necessary
-        del data_dict["Series"][0]
+    """Formats the series as key=value pairs."""
+    child2 = "\t\t" if prettyprint else ""
+    child3 = "\t\t\t" if prettyprint else ""
+    nl = "\n" if prettyprint else ""
-    def __format_ser_str(data_info: Dict[Any, Any]) -> str:
-        """Formats the series as key=value pairs."""
-        child2 = "\t\t" if prettyprint else ""
-        child3 = "\t\t\t" if prettyprint else ""
-        nl = "\n" if prettyprint else ""
+    out_element = f"{child2}<Series "
-        out_element = f"{child2}<Series "
+    for k, v in data_info.items():
+        if k != "Obs":
+            out_element += f"{k}={__escape_xml(str(v))!r} "
-        for k, v in data_info.items():
-            if k != "Obs":
-                out_element += f"{k}={__escape_xml(str(v))!r} "
+    out_element += f">{nl}"
-        out_element += f">{nl}"
+    for obs in data_info["Obs"]:
+        out_element += f"{child3}<Obs "
-        for obs in data_info["Obs"]:
-            out_element += f"{child3}<Obs "
+        for k, v in obs.items():
+            out_element += f"{k}={__escape_xml(str(v))!r} "
-            for k, v in obs.items():
-                out_element += f"{k}={__escape_xml(str(v))!r} "
+        out_element += f"/>{nl}"
-            out_element += f"/>{nl}"
+    out_element += f"{child2}</Series>{nl}"
-        out_element += f"{child2}</Series>{nl}"
+    return out_element
-        return out_element
-    # Getting each datapoint from data and creating dict
-    data = data.sort_values(series_codes, axis=0)
-    data_dict = {
+def __build_series_dict(
+    data: pd.DataFrame, series_codes: List[str]
+) -> Dict[str, List[Dict[Hashable, Any]]]:
+    """Build series dictionary from data."""
+    if not series_codes:
+        return {"Series": [{}] if not data.empty else []}
+    return {
         "Series": data[series_codes]
         .drop_duplicates()
         .reset_index(drop=True)
         .to_dict(orient="records")
     }
-    out = __generate_series_str()
-    return out
+def __process_series_observations(
+    data: pd.DataFrame,
+    series_codes: List[str],
+    obs_codes: List[str],
+    data_dict: Dict[str, List[Dict[Hashable, Any]]],
+    prettyprint: bool = True,
+) -> str:
+    """Process series and their observations into XML string."""
+    out_list: List[str] = []
+    def append_series_with_obs(obs: Any) -> str:
+        """Append series with observations to output list."""
+        data_dict["Series"][0]["Obs"] = obs.to_dict(orient="records")
+        result = __format_ser_str(data_dict["Series"][0], prettyprint)
+        out_list.append(result)
+        del data_dict["Series"][0]
+        return result
+    if not series_codes:
+        if not data.empty:
+            append_series_with_obs(data[obs_codes])
+    else:
+        data.groupby(by=series_codes)[obs_codes].apply(append_series_with_obs)
+    return "".join(out_list)
+def __series_processing(
+    data: pd.DataFrame,
+    series_codes: List[str],
+    obs_codes: List[str],
+    prettyprint: bool = True,
+) -> str:
+    """Write series to SDMX-ML Structure-Specific format."""
+    data = data.sort_values(series_codes, axis=0)
+    data_dict = __build_series_dict(data, series_codes)
+    return __process_series_observations(
+        data, series_codes, obs_codes, data_dict, prettyprint
+    )

pysdmx/io/xml/sdmx21/writer/generic.py CHANGED Viewed

@@ -2,10 +2,11 @@
 """Module for writing SDMX-ML 2.1 Generic data messages."""
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
+from typing import Any, Dict, Hashable, List, Optional, Sequence, Tuple, Union
 import pandas as pd
+from pysdmx.io._pd_utils import _fill_na_values
 from pysdmx.io.format import Format
 from pysdmx.io.pd import PandasDataset
 from pysdmx.io.xml.__write_aux import (
@@ -122,7 +123,6 @@ def __write_data_generic(
     for short_urn, dataset in datasets.items():
         writing_validation(dataset)
-        dataset.data = dataset.data.fillna("").astype(str)
         outfile += __write_data_single_dataset(
             dataset=dataset,
             prettyprint=prettyprint,
@@ -160,7 +160,8 @@ def __write_data_single_dataset(
     outfile = ""
     structure_urn = get_structure(dataset)
     id_structure = parse_short_urn(structure_urn).id
-    dataset.data = dataset.data.fillna("").astype(str).replace("nan", "")
+    schema = writing_validation(dataset)
+    dataset.data = _fill_na_values(dataset.data, schema)
     nl = "\n" if prettyprint else ""
     child1 = "\t" if prettyprint else ""
@@ -347,9 +348,14 @@ def __series_processing(
 ) -> str:
     def __generate_series_str() -> str:
         out_list: List[str] = []
-        data.groupby(by=series_codes + series_att_codes)[data.columns].apply(
-            lambda x: __format_dict_ser(out_list, x)
-        )
+        group_cols = series_codes + series_att_codes
+        if not group_cols:
+            if not data.empty:
+                __format_dict_ser(out_list, data)
+        else:
+            data.groupby(by=group_cols)[data.columns].apply(
+                lambda x: __format_dict_ser(out_list, x)
+            )
         return "".join(out_list)
@@ -359,13 +365,14 @@ def __series_processing(
     ) -> Any:
         obs_data = group_data[obs_codes + obs_att_codes].copy()
         data_dict["Series"][0]["Obs"] = obs_data.to_dict(orient="records")
-        data_dict["Series"][0].update(
-            {
-                k: v
-                for k, v in group_data[series_att_codes].iloc[0].items()
-                if k in series_att_codes
-            }
-        )
+        if series_att_codes:
+            data_dict["Series"][0].update(
+                {
+                    k: v
+                    for k, v in group_data[series_att_codes].iloc[0].items()
+                    if k in series_att_codes
+                }
+            )
         output_list.append(
             __format_ser_str(
                 data_info=data_dict["Series"][0],
@@ -380,12 +387,17 @@ def __series_processing(
     # Getting each datapoint from data and creating dict
     data = data.sort_values(series_codes, axis=0)
-    data_dict = {
-        "Series": data[series_codes]
-        .drop_duplicates()
-        .reset_index(drop=True)
-        .to_dict(orient="records")
-    }
+    if not series_codes:
+        data_dict: Dict[str, List[Dict[Hashable, Any]]] = {
+            "Series": [{}] if not data.empty else []
+        }
+    else:
+        data_dict = {
+            "Series": data[series_codes]
+            .drop_duplicates()
+            .reset_index(drop=True)
+            .to_dict(orient="records")
+        }
     out = __generate_series_str()

pysdmx/model/__base.py CHANGED Viewed

@@ -1,5 +1,6 @@
+import re
 from datetime import datetime
-from typing import Any, Optional, Sequence, Union
+from typing import Any, Literal, Optional, Sequence, Union
 from msgspec import Struct
@@ -327,6 +328,50 @@ class ItemScheme(MaintainableArtefact, frozen=True, omit_defaults=True):
     items: Sequence[Item] = ()
     is_partial: bool = False
+    def search(
+        self,
+        query: str,
+        use_regex: bool = False,
+        fields: Literal["name", "description", "all"] = "all",
+    ) -> Sequence[Item]:
+        """Search for items matching the query.
+        Args:
+            query: The substring or regex pattern to search for.
+            use_regex: Whether to treat the query as a regex (default: False).
+            fields: The fields to search in (default: all textual fields).
+        Returns:
+           Items that match the query.
+        """
+        if not query:
+            raise Invalid(
+                "Invalid search", "The query string cannot be empty."
+            )
+        # Determine which fields to search in
+        search_fields = (
+            ["name", "description"] if fields == "all" else [fields]
+        )
+        # Transform plain text queries into a regex
+        if not use_regex:
+            query = re.escape(query)
+        pattern = re.compile(query, re.IGNORECASE if not use_regex else 0)
+        all_items = getattr(self, "all_items", "")
+        items = all_items if all_items else self.items
+        return [
+            item  # type: ignore[misc]
+            for item in items
+            if any(
+                pattern.search(str(getattr(item, field, "")))
+                for field in search_fields
+            )
+        ]
 class DataflowRef(
     Struct, frozen=True, omit_defaults=True, repr_omit_defaults=True, tag=True

pysdmx/model/__init__.py CHANGED Viewed

@@ -29,6 +29,16 @@ from pysdmx.model.code import (
     HierarchyAssociation,
 )
 from pysdmx.model.concept import Concept, ConceptScheme, DataType, Facets
+from pysdmx.model.constraint import (
+    ConstraintAttachment,
+    CubeKeyValue,
+    CubeRegion,
+    CubeValue,
+    DataConstraint,
+    DataKey,
+    DataKeyValue,
+    KeySet,
+)
 from pysdmx.model.dataflow import (
     ArrayBoundaries,
     Component,
@@ -161,9 +171,16 @@ __all__ = [
     "ComponentMap",
     "Concept",
     "ConceptScheme",
+    "ConstraintAttachment",
     "Contact",
+    "CubeKeyValue",
+    "CubeRegion",
+    "CubeValue",
     "DataConsumer",
     "DataConsumerScheme",
+    "DataConstraint",
+    "DataKey",
+    "DataKeyValue",
     "Dataflow",
     "DataflowInfo",
     "DataflowRef",
@@ -180,6 +197,7 @@ __all__ = [
     "HierarchyAssociation",
     "ImplicitComponentMap",
     "ItemReference",
+    "KeySet",
     "MetadataAttribute",
     "MetadataComponent",
     "Metadataflow",

pysdmx/model/category.py CHANGED Viewed

@@ -91,6 +91,15 @@ class CategoryScheme(ItemScheme, frozen=True, omit_defaults=True):
             flows.update(self.__extract_flows(cat))
         return list(flows)
+    @property
+    def all_items(self) -> Sequence[Category]:
+        """Get all the categories in the category scheme as a flat list.
+        Returns:
+            A flat list of all the categories present in the category scheme.
+        """
+        return self.__get_categories(self.categories)
     def __iter__(self) -> Iterator[Category]:
         """Return an iterator over the list of categories."""
         yield from self.categories
@@ -160,6 +169,14 @@ class CategoryScheme(ItemScheme, frozen=True, omit_defaults=True):
             processed_output.append(f"{attr}: {value}")
         return f"{', '.join(processed_output)}"
+    def __get_categories(self, cats: Sequence[Category]) -> Sequence[Category]:
+        out = []
+        for cat in cats:
+            out.append(cat)
+            if cat.categories:
+                out.extend(self.__get_categories(cat.categories))
+        return out
 class Categorisation(
     MaintainableArtefact, frozen=True, omit_defaults=True, kw_only=True

pysdmx/model/concept.py CHANGED Viewed

@@ -35,6 +35,8 @@ class DataType(str, Enum):
     """True or False."""
     COUNT = "Count"
     """A simple incrementing integer type."""
+    DATA_SET_REFERENCE = "DataSetReference"
+    """Reference to a data set."""
     DATE = "GregorianDay"
     """A ISO 8601 date (e.g. ``2011-06-17``)."""
     DATE_TIME = "DateTime"
@@ -47,12 +49,24 @@ class DataType(str, Enum):
     """A decimal number (8 bytes)."""
     DURATION = "Duration"
     """An ISO 8601 duration."""
+    EXCLUSIVE_VALUE_RANGE = "ExclusiveValueRange"
+    """A range of values excluding boundaries."""
     FLOAT = "Float"
     """A decimal number (4 bytes)."""
+    GEOSPATIAL_INFORMATION = "GeospatialInformation"
+    """Geospatial data format."""
     GREGORIAN_TIME_PERIOD = "GregorianTimePeriod"
     """This is the union of YEAR, YEAR_MONTH, and DATE."""
+    IDENTIFIABLE_REFERENCE = "IdentifiableReference"
+    """Reference to an identifiable object."""
+    INCLUSIVE_VALUE_RANGE = "InclusiveValueRange"
+    """A range of values including boundaries."""
+    INCREMENTAL = "Incremental"
+    """An integer type that increases sequentially."""
     INTEGER = "Integer"
     """A whole number (4 bytes)."""
+    KEY_VALUES = "KeyValues"
+    """Key values reference."""
     LONG = "Long"
     """A whole number (8 bytes)."""
     MONTH = "Month"
@@ -85,6 +99,8 @@ class DataType(str, Enum):
     """A string (as immutable sequence of Unicode code points)."""
     TIME = "Time"
     """An ISO 8601 time (e.g. ``12:50:42``)."""
+    TIMES_RANGE = "TimesRange"
+    """A range of time periods."""
     URI = "URI"
     """A uniform resource identifier, such as a URL."""
     XHTML = "XHTML"

pysdmx/model/constraint.py ADDED Viewed

@@ -0,0 +1,69 @@
+"""Model for SDMX Data Constraints."""
+from datetime import datetime
+from typing import Optional, Sequence
+from msgspec import Struct
+from pysdmx.model.__base import MaintainableArtefact
+class CubeValue(Struct, frozen=True, omit_defaults=True):
+    """A value of the cube, with optional business validity."""
+    value: str
+    valid_from: Optional[datetime] = None
+    valid_to: Optional[datetime] = None
+class CubeKeyValue(Struct, frozen=True, omit_defaults=True):
+    """The list of values for a cube's component."""
+    id: str
+    values: Sequence[CubeValue]
+class CubeRegion(Struct, frozen=True, omit_defaults=True):
+    """A cube region, with its associated values (by default, included)."""
+    key_values: Sequence[CubeKeyValue]
+    is_included: bool = True
+class ConstraintAttachment(Struct, frozen=True, omit_defaults=True):
+    """The artefacts to which the data constraint is attached."""
+    data_provider: Optional[str]
+    data_structures: Optional[Sequence[str]] = None
+    dataflows: Optional[Sequence[str]] = None
+    provision_agreements: Optional[Sequence[str]] = None
+class DataKeyValue(Struct, frozen=True, omit_defaults=True):
+    """A key value, i.e. a component of the key (e.g. FREQ=M)."""
+    id: str
+    value: str
+class DataKey(Struct, frozen=True, omit_defaults=True):
+    """A data key, i.e. one value per dimension in the data key."""
+    keys_values: Sequence[DataKeyValue]
+    valid_from: Optional[datetime] = None
+    valid_to: Optional[datetime] = None
+class KeySet(Struct, frozen=True, omit_defaults=True):
+    """A set of keys, inluded by default."""
+    keys: Sequence[DataKey]
+    is_included: bool
+class DataConstraint(MaintainableArtefact, frozen=True, omit_defaults=True):
+    """A data constraint, defining the allowed or available values."""
+    constraint_attachment: Optional[ConstraintAttachment] = None
+    cube_regions: Sequence[CubeRegion] = ()
+    key_sets: Sequence[KeySet] = ()

pysdmx 1.10.0rc1__py3-none-any.whl → 1.10.0rc2__py3-none-any.whl

pysdmx 1.10.0rc1py3-none-any.whl → 1.10.0rc2py3-none-any.whl