PyPI - notoecd - Versions diffs - 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl - Mend

notoecd 0.1.4py3-none-any.whl → 0.1.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

notoecd/__init__.py +1 -1
notoecd/calls.py +61 -8
notoecd/datasets.py +4 -3
notoecd/structure.py +5 -1
{notoecd-0.1.4.dist-info → notoecd-0.1.5.dist-info}/METADATA +3 -3
notoecd-0.1.5.dist-info/RECORD +9 -0
notoecd-0.1.4.dist-info/RECORD +0 -9
{notoecd-0.1.4.dist-info → notoecd-0.1.5.dist-info}/WHEEL +0 -0
{notoecd-0.1.4.dist-info → notoecd-0.1.5.dist-info}/licenses/LICENSE +0 -0
{notoecd-0.1.4.dist-info → notoecd-0.1.5.dist-info}/top_level.txt +0 -0

notoecd/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-__version__ = "0.1.4"
+__version__ = "0.1.5"
 from .calls import get_df
 from .structure import get_structure

notoecd/calls.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import pandas as pd
-from typing import Union
 from functools import lru_cache
+from typing import Union, Optional
 from .structure import get_structure
 @lru_cache(maxsize=64)
@@ -12,26 +12,58 @@ def _clean(s: str) -> str:
     return str(s).strip().lower()
+def _clean_dict(d: dict) -> dict:
+    out = {}
+    for k, v in d.items():
+        k = _clean(k)
+        if isinstance(v, (list, tuple, set)):
+            out[k] = [_clean(x) for x in v]
+        else:
+            out[k] = _clean(v)
+    return out
 def _build_filter_expression(
     agencyID: str,
     dataflowID: str,
     filters: dict,
 ) -> str:
+    """
+    Builds a valid OECD SDMX filter expression from a dictionary.
-    s = get_structure(agencyID, dataflowID)
-    filters = {_clean(k): v for k, v in filters.items()}
+    Args:
+        agencyID (str): The data provider agency identifier.
+        dataflowID (str): The dataflow identifier within the agency.
+        filters (dict): Dictionary with dimension names as keys and
+            either codes or labels as values.
+    Returns
+        str: A valid OECD SDMX filter expression.
+    """
+    s = get_structure(agencyID, dataflowID)
+    filters = _clean_dict(filters)
     parts = []
     for dim in s.toc.title:
         dim_key = _clean(dim)
         if dim_key in filters:
             val = filters[dim_key]
-            if isinstance(val, str):
+            concepts = _clean_dict(s.explain_vals(dim_key))
+            rev = {v: k for k, v in concepts.items()}
+            if isinstance(val, str):
                 val = [val]
-            parts.append("+".join(_clean(v) for v in val))
+            val = [_clean(v) for v in val]
+            for i, v in enumerate(val):
+                if v in concepts: continue
+                if v in rev: val[i] = rev[v]
+                else: raise ValueError(f"Invalid value '{v}' for dimension '{dim_key}'. ")
+            parts.append("+".join(val))
         else:
             parts.append("")
     return ".".join(parts).upper()
@@ -40,8 +72,26 @@ def get_df(
     dataflowID: str,
     filters: Union[str, dict] = "",
     version: str = "",
+    startYear: Optional[int] = None,
+    endYear: Optional[int] = None,
 ) -> pd.DataFrame:
+    """
+    Fetch data from the OECD SDMX API and return it as a pandas DataFrame.
+    Args:
+        agencyID (str): The data provider agency identifier.
+        dataflowID (str): The dataflow identifier within the agency.
+        filters (Union[str, dict], optional): Either a preformatted SDMX filter
+            string or a dictionary of filters.
+        version (str, optional): The dataflow version. Use an empty string for
+            the latest version.
+        startYear (int, optional): Start year (inclusive).
+        endYear (int, optional): End year (inclusive).
+    Returns:
+        pd.DataFrame: The resulting dataset.
+    """
     if isinstance(filters, dict):
         filter_expression = _build_filter_expression(agencyID, dataflowID, filters)
     else:
@@ -50,8 +100,11 @@ def get_df(
     url = (
         f"https://sdmx.oecd.org/public/rest/data/"
         f"{agencyID},{dataflowID},{version}/{filter_expression}"
-        "?dimensionAtObservation=AllDimensions&format=csvfile"
+        f"?dimensionAtObservation=AllDimensions&format=csvfile"
     )
+    if startYear is not None: url += f"&startPeriod={startYear}"
+    if endYear is not None: url += f"&endPeriod={endYear}"
     base_df = _fetch_df(url)
     return base_df.copy()

notoecd/datasets.py CHANGED Viewed

@@ -68,7 +68,7 @@ def search_keywords(*keywords: str) -> pd.DataFrame:
     Searches OECD datasets for a set of keywords.
     Args:
-        keywords (str | list[str]): Single keyword or list of keywords. Acts as OR.
+        *keywords (str): One or more keywords. Acts as OR.
     Returns:
         pd.DataFrame: Matching rows.
@@ -89,13 +89,13 @@ def search_keywords(*keywords: str) -> pd.DataFrame:
             )
         )
-    # Combined normalized text for each row
     text = (
         datasets["name"].fillna("").astype(str)
         + " "
         + datasets["description"].fillna("").astype(str)
     )
     text_norm = _normalize_series(text)
+    name_norm = _normalize_series(datasets["name"])
     def _normalize_kw(kw: str) -> str:
         kw = unicodedata.normalize("NFKD", kw.lower())
@@ -108,8 +108,9 @@ def search_keywords(*keywords: str) -> pd.DataFrame:
     for kw in norm_keywords:
         m = text_norm.str.contains(kw, na=False, regex=False)
+        mt = name_norm.str.contains(kw, na=False, regex=False)
         overall_mask |= m
-        score = score.add(m.astype("int8"), fill_value=0)
+        score = score.add(m.astype("int8"), fill_value=0) + mt.astype("int8")
     result = datasets.loc[overall_mask].copy()
     result["_match_score"] = score.loc[overall_mask]

notoecd/structure.py CHANGED Viewed

@@ -138,7 +138,11 @@ class Structure():
             print(f"Could not find explanation for {unclean_values}")
         return {i:explanation[i] for i in clean_values}
+    def explain_columns(self, df:pd.DataFrame) -> dict:
+        dim_dict = self.concepts['DIMENSIONS']
+        return {i: dim_dict[i] for i in df.columns if i in dim_dict}
-@lru_cache(maxsize=128)
+@lru_cache(maxsize=64)
 def get_structure(agencyID: str, dataflowID: str) -> Structure:
     return Structure(agencyID, dataflowID)

{notoecd-0.1.4.dist-info → notoecd-0.1.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: notoecd
-Version: 0.1.4
+Version: 0.1.5
 Summary: Unofficial library for interacting with the OECD Data Explorer through Python.
 Author-email: Daniel Vegara Balsa <daniel.vegarabalsa@oecd.org>
 License-Expression: MIT
@@ -68,7 +68,7 @@ This returns datasets that mention GDP and regional levels (TL2/TL3). It gives t
 Once a dataset is identified, load its SDMX structure:
-    dataset = 'Gross domestic product - Regions'
+    # Gross domestic product - Regions
     agencyID = 'OECD.CFE.EDS'
     dataflowID = 'DSD_REG_ECO@DF_GDP'
@@ -103,7 +103,7 @@ Keys correspond to SDMX dimensions, values are strings or lists (for multiple va
 Fetch the filtered dataset:
-    df = notoecd.get_df(agency, dataflow, filters)
+    df = notoecd.get_df(agencyID, dataflowID, filters)
     df.head()
 The returned object is a pandas DataFrame containing the requested subset of OECD SDMX data.

notoecd-0.1.5.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+notoecd/__init__.py,sha256=v397Mb7CzA-dmcZYLK6AiEesjCf3ugEv0keEVFh8bz4,189
+notoecd/calls.py,sha256=wy7ELlMLl6X5bEN7V4Pn-AcV8YYqAUPWzL-9uT-1NzM,3400
+notoecd/datasets.py,sha256=BTDgdhOK6tJrU0fp_7GZQOoTpOf4CRQrqDVkXcvpAcU,3818
+notoecd/structure.py,sha256=ur8kkdCL2zRjCVrw1grtyCRNZ-aqHKRgv9X_a_6qtDQ,7349
+notoecd-0.1.5.dist-info/licenses/LICENSE,sha256=jb9xgeCKfW-VCXFQtYmiM_SZ9tC2zPGtOIVsE5G17W8,1076
+notoecd-0.1.5.dist-info/METADATA,sha256=hnnWK8pgdyajFl71f3hLsS3-QP_vYdapjYnQK-E9YNM,2755
+notoecd-0.1.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+notoecd-0.1.5.dist-info/top_level.txt,sha256=GrcbH10OAguGh5dkpzst216N_C-NtZ-QF1nlXiUpeLs,8
+notoecd-0.1.5.dist-info/RECORD,,

notoecd-0.1.4.dist-info/RECORD DELETED Viewed

@@ -1,9 +0,0 @@
-notoecd/__init__.py,sha256=8_cXiMZN0gino3W2Wat06ncVcAR8XpCBgC3Q7vEbHjQ,189
-notoecd/calls.py,sha256=SFM4kerc-K43Yo6oDBCsnvCIpN2Bg0-sHKpRfAujS-o,1496
-notoecd/datasets.py,sha256=c8iz2HzWyCGGQINNnzlHG-kJMqsDKFbDObvK11QZU0Y,3751
-notoecd/structure.py,sha256=sq6HrjNLfK-UWr9Cuqxun_DhHLPdegX7j7pKYcEYcJM,7169
-notoecd-0.1.4.dist-info/licenses/LICENSE,sha256=jb9xgeCKfW-VCXFQtYmiM_SZ9tC2zPGtOIVsE5G17W8,1076
-notoecd-0.1.4.dist-info/METADATA,sha256=hDuKZgO-urzF1ZHaqFIco36OFN435-b8zFf3lkwhs9c,2761
-notoecd-0.1.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-notoecd-0.1.4.dist-info/top_level.txt,sha256=GrcbH10OAguGh5dkpzst216N_C-NtZ-QF1nlXiUpeLs,8
-notoecd-0.1.4.dist-info/RECORD,,

{notoecd-0.1.4.dist-info → notoecd-0.1.5.dist-info}/WHEEL RENAMED Viewed

File without changes

{notoecd-0.1.4.dist-info → notoecd-0.1.5.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{notoecd-0.1.4.dist-info → notoecd-0.1.5.dist-info}/top_level.txt RENAMED Viewed

File without changes

notoecd 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl

notoecd 0.1.4py3-none-any.whl → 0.1.5py3-none-any.whl