notoecd 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- notoecd/__init__.py +1 -1
- notoecd/datasets.py +41 -34
- {notoecd-0.1.1.dist-info → notoecd-0.1.2.dist-info}/METADATA +3 -3
- notoecd-0.1.2.dist-info/RECORD +8 -0
- notoecd-0.1.1.dist-info/RECORD +0 -8
- {notoecd-0.1.1.dist-info → notoecd-0.1.2.dist-info}/WHEEL +0 -0
- {notoecd-0.1.1.dist-info → notoecd-0.1.2.dist-info}/top_level.txt +0 -0
notoecd/__init__.py
CHANGED
notoecd/datasets.py
CHANGED
|
@@ -18,43 +18,51 @@ NS = {
|
|
|
18
18
|
_ws_re = re.compile(r"\s+")
|
|
19
19
|
_tag_re = re.compile(r"<[^>]+>")
|
|
20
20
|
|
|
21
|
-
def
|
|
21
|
+
def _clean(s: str | None) -> str | None:
|
|
22
22
|
if s is None: return None
|
|
23
23
|
s = html.unescape(s)
|
|
24
24
|
s = _tag_re.sub("", s)
|
|
25
25
|
s = _ws_re.sub(" ", s).strip()
|
|
26
26
|
return s or None
|
|
27
27
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
28
|
+
# Cache
|
|
29
|
+
_datasets: pd.DataFrame | None = None
|
|
30
|
+
|
|
31
|
+
def _load_datasets() -> pd.DataFrame:
|
|
32
|
+
"""
|
|
33
|
+
Loads OECD datasets and keeps them in memory.
|
|
34
|
+
"""
|
|
35
|
+
global _datasets
|
|
36
|
+
if _datasets is not None: return _datasets
|
|
37
|
+
|
|
38
|
+
headers = {"Accept": "application/vnd.sdmx.structure+xml;version=2.1"}
|
|
39
|
+
r = requests.get(url, headers=headers, timeout=30)
|
|
40
|
+
r.raise_for_status()
|
|
41
|
+
root = ET.fromstring(r.content)
|
|
42
|
+
|
|
43
|
+
rows = []
|
|
44
|
+
for df in root.findall(".//structure:Dataflow", NS):
|
|
45
|
+
dataflow_id = df.attrib.get("id")
|
|
46
|
+
agency_id = df.attrib.get("agencyID")
|
|
47
|
+
|
|
48
|
+
name_elem = df.find("common:Name[@xml:lang='en']", NS)
|
|
49
|
+
desc_elem = df.find("common:Description[@xml:lang='en']", NS)
|
|
50
|
+
|
|
51
|
+
name = _clean("".join(name_elem.itertext())) if name_elem is not None else None
|
|
52
|
+
desc_raw = "".join(desc_elem.itertext()) if desc_elem is not None else None
|
|
53
|
+
desc = _clean(desc_raw)
|
|
54
|
+
|
|
55
|
+
rows.append(
|
|
56
|
+
{
|
|
57
|
+
"dataflowID": dataflow_id,
|
|
58
|
+
"agencyID": agency_id,
|
|
59
|
+
"name": name,
|
|
60
|
+
"description": desc,
|
|
61
|
+
}
|
|
62
|
+
)
|
|
31
63
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
root = ET.fromstring(r.content)
|
|
35
|
-
|
|
36
|
-
rows = []
|
|
37
|
-
for df in root.findall(".//structure:Dataflow", NS):
|
|
38
|
-
dataflow_id = df.attrib.get("id")
|
|
39
|
-
agency_id = df.attrib.get("agencyID")
|
|
40
|
-
|
|
41
|
-
name_elem = df.find("common:Name[@xml:lang='en']", NS)
|
|
42
|
-
desc_elem = df.find("common:Description[@xml:lang='en']", NS)
|
|
43
|
-
|
|
44
|
-
name = _clean_text("".join(name_elem.itertext())) if name_elem is not None else None
|
|
45
|
-
desc_raw = "".join(desc_elem.itertext()) if desc_elem is not None else None
|
|
46
|
-
desc = _clean_text(desc_raw)
|
|
47
|
-
|
|
48
|
-
rows.append(
|
|
49
|
-
{
|
|
50
|
-
"dataflowID": dataflow_id,
|
|
51
|
-
"agencyID": agency_id,
|
|
52
|
-
"name": name,
|
|
53
|
-
"description": desc,
|
|
54
|
-
}
|
|
55
|
-
)
|
|
56
|
-
|
|
57
|
-
datasets = pd.DataFrame(rows)
|
|
64
|
+
_datasets = pd.DataFrame(rows)
|
|
65
|
+
return _datasets
|
|
58
66
|
|
|
59
67
|
def search_keywords(keywords: Union[str, List[str]]) -> pd.DataFrame:
|
|
60
68
|
"""
|
|
@@ -66,6 +74,7 @@ def search_keywords(keywords: Union[str, List[str]]) -> pd.DataFrame:
|
|
|
66
74
|
Returns:
|
|
67
75
|
pd.DataFrame: Matching rows.
|
|
68
76
|
"""
|
|
77
|
+
datasets = _load_datasets()
|
|
69
78
|
|
|
70
79
|
# Normalize keywords input
|
|
71
80
|
if isinstance(keywords, str): keywords = [keywords]
|
|
@@ -78,10 +87,8 @@ def search_keywords(keywords: Union[str, List[str]]) -> pd.DataFrame:
|
|
|
78
87
|
def _normalize_series(s: pd.Series) -> pd.Series:
|
|
79
88
|
s = s.fillna("").astype(str).str.lower()
|
|
80
89
|
return s.map(
|
|
81
|
-
lambda x: "".join(
|
|
82
|
-
|
|
83
|
-
if not unicodedata.combining(ch)
|
|
84
|
-
)
|
|
90
|
+
lambda x: "".join(ch for ch in unicodedata.normalize("NFKD", x)
|
|
91
|
+
if not unicodedata.combining(ch))
|
|
85
92
|
)
|
|
86
93
|
|
|
87
94
|
# Combined normalized text for each row
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: notoecd
|
|
3
|
-
Version: 0.1.
|
|
4
|
-
Summary:
|
|
3
|
+
Version: 0.1.2
|
|
4
|
+
Summary: Unofficial library for interacting with the OECD Data Explorer through Python.
|
|
5
5
|
Author-email: Daniel Vegara Balsa <daniel.vegarabalsa@oecd.org>
|
|
6
6
|
License-Expression: MIT
|
|
7
7
|
Project-URL: Homepage, https://github.com/dani-37/notoecd
|
|
@@ -15,6 +15,7 @@ Requires-Dist: requests>=2.31
|
|
|
15
15
|
⚠️ **Unofficial package, not endorsed by the OECD.**
|
|
16
16
|
|
|
17
17
|
A lightweight Python interface for exploring OECD SDMX structures and downloading OECD datasets.
|
|
18
|
+
|
|
18
19
|
The package provides utilities for:
|
|
19
20
|
|
|
20
21
|
- Discovering dataset metadata
|
|
@@ -110,4 +111,3 @@ The returned object is a pandas DataFrame containing the requested subset of OEC
|
|
|
110
111
|
## Examples
|
|
111
112
|
|
|
112
113
|
You can see this full example as a notebook called example.ipynb.
|
|
113
|
-
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
notoecd/__init__.py,sha256=J3-DA__Z9hHzp6eOszRMFh4VE6tk1YtSNigW7el4qDM,189
|
|
2
|
+
notoecd/calls.py,sha256=CDx-1wJ4myXtoihIfTvjHoXBvIwylvv7AdN_UL5gnF4,1492
|
|
3
|
+
notoecd/datasets.py,sha256=a1L45vie6q4cjNXAued2gQ4oE4Fbpk25kdhDTexMuAI,4158
|
|
4
|
+
notoecd/structure.py,sha256=sq6HrjNLfK-UWr9Cuqxun_DhHLPdegX7j7pKYcEYcJM,7169
|
|
5
|
+
notoecd-0.1.2.dist-info/METADATA,sha256=kt6OjNXdlGpn5K-_WQm-z-GLKbbPLgF3mq0C7YSGNho,3192
|
|
6
|
+
notoecd-0.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
7
|
+
notoecd-0.1.2.dist-info/top_level.txt,sha256=GrcbH10OAguGh5dkpzst216N_C-NtZ-QF1nlXiUpeLs,8
|
|
8
|
+
notoecd-0.1.2.dist-info/RECORD,,
|
notoecd-0.1.1.dist-info/RECORD
DELETED
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
notoecd/__init__.py,sha256=134t-qMBmEQN_mtpGOSfQ5zr128wWT68J0e5RIBzN0g,189
|
|
2
|
-
notoecd/calls.py,sha256=CDx-1wJ4myXtoihIfTvjHoXBvIwylvv7AdN_UL5gnF4,1492
|
|
3
|
-
notoecd/datasets.py,sha256=nxLNP0L28mXTKYpdR6BQN5Tk6CKoQS7dygm8twnIVSg,3845
|
|
4
|
-
notoecd/structure.py,sha256=sq6HrjNLfK-UWr9Cuqxun_DhHLPdegX7j7pKYcEYcJM,7169
|
|
5
|
-
notoecd-0.1.1.dist-info/METADATA,sha256=YphZ-il5WvyxK0u1w3cI_vdH2uUdCLaWd3ms981SovI,3180
|
|
6
|
-
notoecd-0.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
7
|
-
notoecd-0.1.1.dist-info/top_level.txt,sha256=GrcbH10OAguGh5dkpzst216N_C-NtZ-QF1nlXiUpeLs,8
|
|
8
|
-
notoecd-0.1.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|