PyPI - notoecd - Versions diffs - 0.1.1__tar.gz → 0.1.2__tar.gz - Mend

notoecd 0.1.1tar.gz → 0.1.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

{notoecd-0.1.1 → notoecd-0.1.2}/PKG-INFO RENAMED Viewed

@@ -1,7 +1,7 @@
 Metadata-Version: 2.4
 Name: notoecd
-Version: 0.1.1
-Summary: Library for interacting with the OECD Data Explorer through Python
+Version: 0.1.2
+Summary: Unofficial library for interacting with the OECD Data Explorer through Python.
 Author-email: Daniel Vegara Balsa <daniel.vegarabalsa@oecd.org>
 License-Expression: MIT
 Project-URL: Homepage, https://github.com/dani-37/notoecd
@@ -15,6 +15,7 @@ Requires-Dist: requests>=2.31
 ⚠️ **Unofficial package, not endorsed by the OECD.**
 A lightweight Python interface for exploring OECD SDMX structures and downloading OECD datasets.
 The package provides utilities for:
 - Discovering dataset metadata
@@ -110,4 +111,3 @@ The returned object is a pandas DataFrame containing the requested subset of OEC
 ## Examples
 You can see this full example as a notebook called example.ipynb.

{notoecd-0.1.1 → notoecd-0.1.2}/README.md RENAMED Viewed

@@ -3,6 +3,7 @@
 ⚠️ **Unofficial package, not endorsed by the OECD.**
 A lightweight Python interface for exploring OECD SDMX structures and downloading OECD datasets.
 The package provides utilities for:
 - Discovering dataset metadata
@@ -97,5 +98,4 @@ The returned object is a pandas DataFrame containing the requested subset of OEC
 ## Examples
-You can see this full example as a notebook called example.ipynb.
+You can see this full example as a notebook called example.ipynb.

{notoecd-0.1.1 → notoecd-0.1.2}/notoecd/__init__.py RENAMED Viewed

@@ -1,4 +1,4 @@
-__version__ = "0.1.1"
+__version__ = "0.1.2"
 from .calls import get_df
 from .structure import get_structure

{notoecd-0.1.1 → notoecd-0.1.2}/notoecd/datasets.py RENAMED Viewed

@@ -18,43 +18,51 @@ NS = {
 _ws_re = re.compile(r"\s+")
 _tag_re = re.compile(r"<[^>]+>")
-def _clean_text(s: str | None) -> str | None:
+def _clean(s: str | None) -> str | None:
     if s is None: return None
     s = html.unescape(s)
     s = _tag_re.sub("", s)
     s = _ws_re.sub(" ", s).strip()
     return s or None
-headers = {
-    "Accept": "application/vnd.sdmx.structure+xml;version=2.1"
-}
+# Cache
+_datasets: pd.DataFrame | None = None
+def _load_datasets() -> pd.DataFrame:
+    """
+    Loads OECD datasets and keeps them in memory.
+    """
+    global _datasets
+    if _datasets is not None: return _datasets
+    headers = {"Accept": "application/vnd.sdmx.structure+xml;version=2.1"}
+    r = requests.get(url, headers=headers, timeout=30)
+    r.raise_for_status()
+    root = ET.fromstring(r.content)
+    rows = []
+    for df in root.findall(".//structure:Dataflow", NS):
+        dataflow_id = df.attrib.get("id")
+        agency_id = df.attrib.get("agencyID")
+        name_elem = df.find("common:Name[@xml:lang='en']", NS)
+        desc_elem = df.find("common:Description[@xml:lang='en']", NS)
+        name = _clean("".join(name_elem.itertext())) if name_elem is not None else None
+        desc_raw = "".join(desc_elem.itertext()) if desc_elem is not None else None
+        desc = _clean(desc_raw)
+        rows.append(
+            {
+                "dataflowID": dataflow_id,
+                "agencyID": agency_id,
+                "name": name,
+                "description": desc,
+            }
+        )
-r = requests.get(url, headers=headers, timeout=30)
-r.raise_for_status()
-root = ET.fromstring(r.content)
-rows = []
-for df in root.findall(".//structure:Dataflow", NS):
-    dataflow_id = df.attrib.get("id")
-    agency_id = df.attrib.get("agencyID")
-    name_elem = df.find("common:Name[@xml:lang='en']", NS)
-    desc_elem = df.find("common:Description[@xml:lang='en']", NS)
-    name = _clean_text("".join(name_elem.itertext())) if name_elem is not None else None
-    desc_raw = "".join(desc_elem.itertext()) if desc_elem is not None else None
-    desc = _clean_text(desc_raw)
-    rows.append(
-        {
-            "dataflowID": dataflow_id,
-            "agencyID": agency_id,
-            "name": name,
-            "description": desc,
-        }
-    )
-datasets = pd.DataFrame(rows)
+    _datasets = pd.DataFrame(rows)
+    return _datasets
 def search_keywords(keywords: Union[str, List[str]]) -> pd.DataFrame:
     """
@@ -66,6 +74,7 @@ def search_keywords(keywords: Union[str, List[str]]) -> pd.DataFrame:
     Returns:
         pd.DataFrame: Matching rows.
     """
+    datasets = _load_datasets()
     # Normalize keywords input
     if isinstance(keywords, str): keywords = [keywords]
@@ -78,10 +87,8 @@ def search_keywords(keywords: Union[str, List[str]]) -> pd.DataFrame:
     def _normalize_series(s: pd.Series) -> pd.Series:
         s = s.fillna("").astype(str).str.lower()
         return s.map(
-            lambda x: "".join(
-                ch for ch in unicodedata.normalize("NFKD", x)
-                if not unicodedata.combining(ch)
-            )
+            lambda x: "".join(ch for ch in unicodedata.normalize("NFKD", x)
+                if not unicodedata.combining(ch))
         )
     # Combined normalized text for each row

{notoecd-0.1.1 → notoecd-0.1.2}/notoecd.egg-info/PKG-INFO RENAMED Viewed

@@ -1,7 +1,7 @@
 Metadata-Version: 2.4
 Name: notoecd
-Version: 0.1.1
-Summary: Library for interacting with the OECD Data Explorer through Python
+Version: 0.1.2
+Summary: Unofficial library for interacting with the OECD Data Explorer through Python.
 Author-email: Daniel Vegara Balsa <daniel.vegarabalsa@oecd.org>
 License-Expression: MIT
 Project-URL: Homepage, https://github.com/dani-37/notoecd
@@ -15,6 +15,7 @@ Requires-Dist: requests>=2.31
 ⚠️ **Unofficial package, not endorsed by the OECD.**
 A lightweight Python interface for exploring OECD SDMX structures and downloading OECD datasets.
 The package provides utilities for:
 - Discovering dataset metadata
@@ -110,4 +111,3 @@ The returned object is a pandas DataFrame containing the requested subset of OEC
 ## Examples
 You can see this full example as a notebook called example.ipynb.

{notoecd-0.1.1 → notoecd-0.1.2}/pyproject.toml RENAMED Viewed

@@ -4,8 +4,8 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "notoecd"
-version = "0.1.1"
-description = "Library for interacting with the OECD Data Explorer through Python"
+version = "0.1.2"
+description = "Unofficial library for interacting with the OECD Data Explorer through Python."
 readme = "README.md"
 license = "MIT"
 requires-python = ">=3.10"

{notoecd-0.1.1 → notoecd-0.1.2}/tests/test_datasets.py RENAMED Viewed

@@ -1,6 +1,5 @@
 import importlib
 import requests
-import pandas as pd
 def _fake_dataflow_all_xml() -> bytes:
@@ -44,9 +43,12 @@ class _Resp:
             raise requests.HTTPError(f"HTTP {self.status_code}")
-def test_datasets_dataframe_built_on_import(monkeypatch):
+def test_datasets_lazy_loaded_and_cached_in_memory(monkeypatch):
+    calls = {"n": 0}
     def fake_get(url, *args, **kwargs):
         if url.endswith("/public/rest/dataflow/all"):
+            calls["n"] += 1
             return _Resp(_fake_dataflow_all_xml())
         raise AssertionError(f"Unexpected URL in test_datasets: {url}")
@@ -55,9 +57,18 @@ def test_datasets_dataframe_built_on_import(monkeypatch):
     datasets_mod = importlib.import_module("notoecd.datasets")
     importlib.reload(datasets_mod)
-    assert isinstance(datasets_mod.datasets, pd.DataFrame)
-    assert {"agencyID", "dataflowID", "name", "description"}.issubset(datasets_mod.datasets.columns)
-    assert len(datasets_mod.datasets) == 3
+    # Import should not fetch
+    assert calls["n"] == 0
+    # First search triggers load
+    hits = datasets_mod.search_keywords(["gdp"])
+    assert calls["n"] == 1
+    assert len(hits) == 1
+    # Second search should reuse in-memory cache (no extra fetch)
+    hits2 = datasets_mod.search_keywords(["cafe"])
+    assert calls["n"] == 1
+    assert len(hits2) == 1
 def test_search_keywords_or_and_normalization(monkeypatch):