notoecd 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
notoecd/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = "0.1.1"
1
+ __version__ = "0.1.2"
2
2
 
3
3
  from .calls import get_df
4
4
  from .structure import get_structure
notoecd/datasets.py CHANGED
@@ -18,43 +18,51 @@ NS = {
18
18
  _ws_re = re.compile(r"\s+")
19
19
  _tag_re = re.compile(r"<[^>]+>")
20
20
 
21
- def _clean_text(s: str | None) -> str | None:
21
+ def _clean(s: str | None) -> str | None:
22
22
  if s is None: return None
23
23
  s = html.unescape(s)
24
24
  s = _tag_re.sub("", s)
25
25
  s = _ws_re.sub(" ", s).strip()
26
26
  return s or None
27
27
 
28
- headers = {
29
- "Accept": "application/vnd.sdmx.structure+xml;version=2.1"
30
- }
28
+ # Cache
29
+ _datasets: pd.DataFrame | None = None
30
+
31
+ def _load_datasets() -> pd.DataFrame:
32
+ """
33
+ Loads OECD datasets and keeps them in memory.
34
+ """
35
+ global _datasets
36
+ if _datasets is not None: return _datasets
37
+
38
+ headers = {"Accept": "application/vnd.sdmx.structure+xml;version=2.1"}
39
+ r = requests.get(url, headers=headers, timeout=30)
40
+ r.raise_for_status()
41
+ root = ET.fromstring(r.content)
42
+
43
+ rows = []
44
+ for df in root.findall(".//structure:Dataflow", NS):
45
+ dataflow_id = df.attrib.get("id")
46
+ agency_id = df.attrib.get("agencyID")
47
+
48
+ name_elem = df.find("common:Name[@xml:lang='en']", NS)
49
+ desc_elem = df.find("common:Description[@xml:lang='en']", NS)
50
+
51
+ name = _clean("".join(name_elem.itertext())) if name_elem is not None else None
52
+ desc_raw = "".join(desc_elem.itertext()) if desc_elem is not None else None
53
+ desc = _clean(desc_raw)
54
+
55
+ rows.append(
56
+ {
57
+ "dataflowID": dataflow_id,
58
+ "agencyID": agency_id,
59
+ "name": name,
60
+ "description": desc,
61
+ }
62
+ )
31
63
 
32
- r = requests.get(url, headers=headers, timeout=30)
33
- r.raise_for_status()
34
- root = ET.fromstring(r.content)
35
-
36
- rows = []
37
- for df in root.findall(".//structure:Dataflow", NS):
38
- dataflow_id = df.attrib.get("id")
39
- agency_id = df.attrib.get("agencyID")
40
-
41
- name_elem = df.find("common:Name[@xml:lang='en']", NS)
42
- desc_elem = df.find("common:Description[@xml:lang='en']", NS)
43
-
44
- name = _clean_text("".join(name_elem.itertext())) if name_elem is not None else None
45
- desc_raw = "".join(desc_elem.itertext()) if desc_elem is not None else None
46
- desc = _clean_text(desc_raw)
47
-
48
- rows.append(
49
- {
50
- "dataflowID": dataflow_id,
51
- "agencyID": agency_id,
52
- "name": name,
53
- "description": desc,
54
- }
55
- )
56
-
57
- datasets = pd.DataFrame(rows)
64
+ _datasets = pd.DataFrame(rows)
65
+ return _datasets
58
66
 
59
67
  def search_keywords(keywords: Union[str, List[str]]) -> pd.DataFrame:
60
68
  """
@@ -66,6 +74,7 @@ def search_keywords(keywords: Union[str, List[str]]) -> pd.DataFrame:
66
74
  Returns:
67
75
  pd.DataFrame: Matching rows.
68
76
  """
77
+ datasets = _load_datasets()
69
78
 
70
79
  # Normalize keywords input
71
80
  if isinstance(keywords, str): keywords = [keywords]
@@ -78,10 +87,8 @@ def search_keywords(keywords: Union[str, List[str]]) -> pd.DataFrame:
78
87
  def _normalize_series(s: pd.Series) -> pd.Series:
79
88
  s = s.fillna("").astype(str).str.lower()
80
89
  return s.map(
81
- lambda x: "".join(
82
- ch for ch in unicodedata.normalize("NFKD", x)
83
- if not unicodedata.combining(ch)
84
- )
90
+ lambda x: "".join(ch for ch in unicodedata.normalize("NFKD", x)
91
+ if not unicodedata.combining(ch))
85
92
  )
86
93
 
87
94
  # Combined normalized text for each row
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: notoecd
3
- Version: 0.1.1
4
- Summary: Library for interacting with the OECD Data Explorer through Python
3
+ Version: 0.1.2
4
+ Summary: Unofficial library for interacting with the OECD Data Explorer through Python.
5
5
  Author-email: Daniel Vegara Balsa <daniel.vegarabalsa@oecd.org>
6
6
  License-Expression: MIT
7
7
  Project-URL: Homepage, https://github.com/dani-37/notoecd
@@ -15,6 +15,7 @@ Requires-Dist: requests>=2.31
15
15
  ⚠️ **Unofficial package, not endorsed by the OECD.**
16
16
 
17
17
  A lightweight Python interface for exploring OECD SDMX structures and downloading OECD datasets.
18
+
18
19
  The package provides utilities for:
19
20
 
20
21
  - Discovering dataset metadata
@@ -110,4 +111,3 @@ The returned object is a pandas DataFrame containing the requested subset of OEC
110
111
  ## Examples
111
112
 
112
113
  You can see this full example as a notebook called example.ipynb.
113
-
@@ -0,0 +1,8 @@
1
+ notoecd/__init__.py,sha256=J3-DA__Z9hHzp6eOszRMFh4VE6tk1YtSNigW7el4qDM,189
2
+ notoecd/calls.py,sha256=CDx-1wJ4myXtoihIfTvjHoXBvIwylvv7AdN_UL5gnF4,1492
3
+ notoecd/datasets.py,sha256=a1L45vie6q4cjNXAued2gQ4oE4Fbpk25kdhDTexMuAI,4158
4
+ notoecd/structure.py,sha256=sq6HrjNLfK-UWr9Cuqxun_DhHLPdegX7j7pKYcEYcJM,7169
5
+ notoecd-0.1.2.dist-info/METADATA,sha256=kt6OjNXdlGpn5K-_WQm-z-GLKbbPLgF3mq0C7YSGNho,3192
6
+ notoecd-0.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
7
+ notoecd-0.1.2.dist-info/top_level.txt,sha256=GrcbH10OAguGh5dkpzst216N_C-NtZ-QF1nlXiUpeLs,8
8
+ notoecd-0.1.2.dist-info/RECORD,,
@@ -1,8 +0,0 @@
1
- notoecd/__init__.py,sha256=134t-qMBmEQN_mtpGOSfQ5zr128wWT68J0e5RIBzN0g,189
2
- notoecd/calls.py,sha256=CDx-1wJ4myXtoihIfTvjHoXBvIwylvv7AdN_UL5gnF4,1492
3
- notoecd/datasets.py,sha256=nxLNP0L28mXTKYpdR6BQN5Tk6CKoQS7dygm8twnIVSg,3845
4
- notoecd/structure.py,sha256=sq6HrjNLfK-UWr9Cuqxun_DhHLPdegX7j7pKYcEYcJM,7169
5
- notoecd-0.1.1.dist-info/METADATA,sha256=YphZ-il5WvyxK0u1w3cI_vdH2uUdCLaWd3ms981SovI,3180
6
- notoecd-0.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
7
- notoecd-0.1.1.dist-info/top_level.txt,sha256=GrcbH10OAguGh5dkpzst216N_C-NtZ-QF1nlXiUpeLs,8
8
- notoecd-0.1.1.dist-info/RECORD,,