opensdmx 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,221 @@
1
+ Metadata-Version: 2.3
2
+ Name: opensdmx
3
+ Version: 0.2.1
4
+ Summary: Simple Python interface to any SDMX 2.1 REST API (Eurostat, ISTAT, and more)
5
+ Author: Andrea Borruso
6
+ Author-email: Andrea Borruso <aborruso@gmail.com>
7
+ License: MIT
8
+ Requires-Dist: duckdb>=1.4.4
9
+ Requires-Dist: httpx>=0.28.1
10
+ Requires-Dist: inquirerpy>=0.3.4
11
+ Requires-Dist: lxml>=6.0.2
12
+ Requires-Dist: numpy>=2.4.2
13
+ Requires-Dist: ollama>=0.6.1
14
+ Requires-Dist: plotnine>=0.15.3
15
+ Requires-Dist: polars>=1.38.1
16
+ Requires-Dist: pyarrow>=23.0.1
17
+ Requires-Dist: questionary>=2.1.1
18
+ Requires-Dist: rich>=14.3.3
19
+ Requires-Dist: tenacity>=9.1.4
20
+ Requires-Dist: typer>=0.24.1
21
+ Requires-Dist: chatlas[google]>=0.7
22
+ Requires-Python: >=3.12
23
+ Description-Content-Type: text/markdown
24
+
25
+ # opensdmx
26
+
27
+ Simple Python CLI and library for any SDMX 2.1 REST API. Default provider: **Eurostat**. Built-in support for ISTAT, OECD, ECB, World Bank, and more.
28
+
29
+ > **Best used with AI.** opensdmx works well on its own, but it shines when driven by an AI agent: the CLI is designed to be composed, queried, and orchestrated step by step. For a guided, interactive experience — dataset discovery, schema exploration, filter selection, and data retrieval — pair it with the [`sdmx-explorer`](skills/sdmx-explorer/SKILL.md) Agent Skill included in this repo.
30
+
31
+ ## Installation
32
+
33
+ **As a CLI tool** (recommended — available system-wide):
34
+
35
+ ```bash
36
+ uv tool install opensdmx
37
+ ```
38
+
39
+ **As a library** (for use in Python projects):
40
+
41
+ ```bash
42
+ uv add opensdmx
43
+ # or
44
+ pip install opensdmx
45
+ ```
46
+
47
+ ## CLI quick start
48
+
49
+ ```bash
50
+ opensdmx search "unemployment"
51
+ opensdmx info une_rt_m
52
+ opensdmx constraints une_rt_m geo
53
+ opensdmx get une_rt_m --freq M --geo IT --sex T --out data.csv
54
+ ```
55
+
56
+ ## Python quick start
57
+
58
+ ```python
59
+ import opensdmx
60
+
61
+ # Default provider: Eurostat
62
+ datasets = opensdmx.all_available()
63
+ print(datasets.head())
64
+
65
+ # Search by keyword
66
+ results = opensdmx.search_dataset("unemployment")
67
+
68
+ # One-liner retrieval (Eurostat default)
69
+ data = opensdmx.fetch("une_rt_m", freq="M", geo="IT", sex="T", age="TOTAL")
70
+
71
+ # Switch provider
72
+ opensdmx.set_provider("istat")
73
+ opensdmx.set_provider("oecd")
74
+ opensdmx.set_provider("ecb")
75
+ ```
76
+
77
+ ## Providers
78
+
79
+ ```python
80
+ import opensdmx
81
+
82
+ # Built-in presets
83
+ opensdmx.set_provider("eurostat") # default
84
+ opensdmx.set_provider("istat")
85
+ opensdmx.set_provider("oecd")
86
+ opensdmx.set_provider("ecb")
87
+ opensdmx.set_provider("worldbank")
88
+
89
+ # Custom provider
90
+ opensdmx.set_provider("https://mysdmx.org/rest", agency_id="XYZ", rate_limit=1.0)
91
+
92
+ # Check active provider
93
+ opensdmx.get_provider() # returns dict with base_url, agency_id, rate_limit, language
94
+ ```
95
+
96
+ ## Python API
97
+
98
+ | Function | Description |
99
+ |---|---|
100
+ | `set_provider(name_or_url, ...)` | Set active provider (`'eurostat'`, `'istat'`, or custom URL) |
101
+ | `get_provider()` | Return active provider config dict |
102
+ | `all_available()` | List all datasets → Polars DataFrame |
103
+ | `search_dataset(keyword)` | Search by keyword in description |
104
+ | `load_dataset(id)` | Create a dataset object (dict) |
105
+ | `print_dataset(ds)` | Print dataset summary |
106
+ | `dimensions_info(ds)` | Dimension metadata → Polars DataFrame |
107
+ | `get_dimension_values(ds, dim)` | Codelist values for a dimension |
108
+ | `get_available_values(ds)` | Values actually present in the data (via `availableconstraint`) |
109
+ | `set_filters(ds, **kwargs)` | Set dimension filters |
110
+ | `reset_filters(ds)` | Reset all filters to `"."` (all) |
111
+ | `get_data(ds, ...)` | Retrieve data → Polars DataFrame |
112
+ | `fetch(id, ..., **filters)` | One-liner: load dataset + set filters + get data |
113
+ | `set_timeout(seconds)` | Get/set API timeout (default: 300 s) |
114
+ | `parse_time_period(series)` | Convert SDMX time strings to dates |
115
+
116
+ ### `get_data` and `fetch` parameters
117
+
118
+ | Parameter | Type | Description |
119
+ |---|---|---|
120
+ | `start_period` | `str` | Start date: `"2020"`, `"2020-Q1"`, `"2020-01"` |
121
+ | `end_period` | `str` | End date (same formats) |
122
+ | `last_n_observations` | `int` | Return only last N observations per series |
123
+ | `first_n_observations` | `int` | Return only first N observations per series |
124
+
125
+ ## Example: EU Unemployment Rate
126
+
127
+ ```python
128
+ import opensdmx
129
+ from plotnine import ggplot, aes, geom_line, geom_point, labs, theme_minimal, scale_x_date
130
+
131
+ # Eurostat monthly unemployment by sex and age
132
+ ds = opensdmx.load_dataset("une_rt_m")
133
+ ds = opensdmx.set_filters(ds, freq="M", geo="IT", sex="T", age="TOTAL", s_adj="SA", unit="PC_ACT")
134
+ data = opensdmx.get_data(ds, start_period="2015", last_n_observations=60)
135
+
136
+ import polars as pl
137
+ data = data.with_columns(pl.col("OBS_VALUE").cast(pl.Float64))
138
+
139
+ plot = (
140
+ ggplot(data.to_pandas(), aes(x="TIME_PERIOD", y="OBS_VALUE"))
141
+ + geom_line(color="#1f77b4", size=1)
142
+ + geom_point(color="#1f77b4", size=0.8)
143
+ + labs(title="Italy Unemployment Rate (Monthly)", x="Year", y="Rate (%)")
144
+ + scale_x_date(date_breaks="2 years", date_labels="%Y")
145
+ + theme_minimal()
146
+ )
147
+ plot.save("unemployment.png", dpi=150, width=10, height=5)
148
+ ```
149
+
150
+ ## CLI
151
+
152
+ ### Commands
153
+
154
+ All commands accept `--provider` (`-p`) to select the provider.
155
+
156
+ | Command | Description |
157
+ |---|---|
158
+ | `opensdmx search <keyword> [-p provider]` | Keyword search in dataset descriptions |
159
+ | `opensdmx search --semantic <query>` | Semantic search (requires `opensdmx embed`) |
160
+ | `opensdmx embed [-p provider]` | Build semantic embeddings cache via Ollama |
161
+ | `opensdmx info <id> [-p provider]` | Show dataset metadata and dimensions |
162
+ | `opensdmx values <id> <dim> [-p provider]` | Show codelist values for a dimension |
163
+ | `opensdmx constraints <id> [dim] [-p provider]` | Show values actually present in the dataflow (via `availableconstraint`) |
164
+ | `opensdmx get <id> [--DIM VALUE] [--start-period P] [--end-period P] [--last-n N] [--first-n N] [--out file] [-p provider]` | Download data (CSV/parquet/JSON) |
165
+ | `opensdmx plot <id> [--DIM VALUE] [--out file] [-p provider]` | Plot data as line chart |
166
+ | `opensdmx blacklist [-p provider]` | List and remove datasets from the unavailability blacklist |
167
+
168
+ ### Examples
169
+
170
+ ```bash
171
+ # Eurostat (default)
172
+ opensdmx search "unemployment"
173
+ opensdmx info une_rt_m
174
+ opensdmx constraints une_rt_m
175
+ opensdmx constraints une_rt_m geo
176
+ opensdmx get une_rt_m --freq M --geo IT --out data.csv
177
+
178
+ # Other providers
179
+ opensdmx search "disoccupazione" --provider istat
180
+ opensdmx get 151_929 --provider istat --FREQ A --REF_AREA IT --out data.csv
181
+ opensdmx search "GDP" --provider oecd
182
+ opensdmx search "inflation" --provider ecb
183
+
184
+ ```
185
+
186
+ ### Semantic search setup
187
+
188
+ Requires [Ollama](https://ollama.com) with the `nomic-embed-text-v2-moe` model:
189
+
190
+ ```bash
191
+ ollama pull nomic-embed-text-v2-moe
192
+ opensdmx embed # build embeddings for default provider (eurostat)
193
+ opensdmx embed -p istat # build embeddings for ISTAT
194
+ opensdmx search --semantic "unemployment"
195
+ ```
196
+
197
+ ### Caching
198
+
199
+ Cache is namespaced per provider under `~/.cache/opensdmx/{AGENCY_ID}/`.
200
+
201
+ | File | Content | TTL |
202
+ |---|---|---|
203
+ | `~/.cache/opensdmx/ESTAT/dataflows.parquet` | Eurostat catalog | 24h |
204
+ | `~/.cache/opensdmx/ESTAT/cache.db` | Dimensions, codelists, constraints (SQLite) | 7 days |
205
+ | `~/.cache/opensdmx/IT1/dataflows.parquet` | ISTAT catalog | 24h |
206
+ | `~/.cache/opensdmx/IT1/cache.db` | ISTAT SQLite cache | 7 days |
207
+
208
+ ## Timeout
209
+
210
+ ```python
211
+ opensdmx.set_timeout() # get current timeout (default: 300s)
212
+ opensdmx.set_timeout(600) # set to 10 minutes
213
+ ```
214
+
215
+ ## Acknowledgements
216
+
217
+ Inspired by [istatR](https://github.com/jfulponi/istatR) by [@jfulponi](https://github.com/jfulponi) and [istatapi](https://github.com/Attol8/istatapi) by [@Attol8](https://github.com/Attol8).
218
+
219
+ ## License
220
+
221
+ MIT License — Copyright (c) 2026 Andrea Borruso
@@ -0,0 +1,197 @@
1
+ # opensdmx
2
+
3
+ Simple Python CLI and library for any SDMX 2.1 REST API. Default provider: **Eurostat**. Built-in support for ISTAT, OECD, ECB, World Bank, and more.
4
+
5
+ > **Best used with AI.** opensdmx works well on its own, but it shines when driven by an AI agent: the CLI is designed to be composed, queried, and orchestrated step by step. For a guided, interactive experience — dataset discovery, schema exploration, filter selection, and data retrieval — pair it with the [`sdmx-explorer`](skills/sdmx-explorer/SKILL.md) Agent Skill included in this repo.
6
+
7
+ ## Installation
8
+
9
+ **As a CLI tool** (recommended — available system-wide):
10
+
11
+ ```bash
12
+ uv tool install opensdmx
13
+ ```
14
+
15
+ **As a library** (for use in Python projects):
16
+
17
+ ```bash
18
+ uv add opensdmx
19
+ # or
20
+ pip install opensdmx
21
+ ```
22
+
23
+ ## CLI quick start
24
+
25
+ ```bash
26
+ opensdmx search "unemployment"
27
+ opensdmx info une_rt_m
28
+ opensdmx constraints une_rt_m geo
29
+ opensdmx get une_rt_m --freq M --geo IT --sex T --out data.csv
30
+ ```
31
+
32
+ ## Python quick start
33
+
34
+ ```python
35
+ import opensdmx
36
+
37
+ # Default provider: Eurostat
38
+ datasets = opensdmx.all_available()
39
+ print(datasets.head())
40
+
41
+ # Search by keyword
42
+ results = opensdmx.search_dataset("unemployment")
43
+
44
+ # One-liner retrieval (Eurostat default)
45
+ data = opensdmx.fetch("une_rt_m", freq="M", geo="IT", sex="T", age="TOTAL")
46
+
47
+ # Switch provider
48
+ opensdmx.set_provider("istat")
49
+ opensdmx.set_provider("oecd")
50
+ opensdmx.set_provider("ecb")
51
+ ```
52
+
53
+ ## Providers
54
+
55
+ ```python
56
+ import opensdmx
57
+
58
+ # Built-in presets
59
+ opensdmx.set_provider("eurostat") # default
60
+ opensdmx.set_provider("istat")
61
+ opensdmx.set_provider("oecd")
62
+ opensdmx.set_provider("ecb")
63
+ opensdmx.set_provider("worldbank")
64
+
65
+ # Custom provider
66
+ opensdmx.set_provider("https://mysdmx.org/rest", agency_id="XYZ", rate_limit=1.0)
67
+
68
+ # Check active provider
69
+ opensdmx.get_provider() # returns dict with base_url, agency_id, rate_limit, language
70
+ ```
71
+
72
+ ## Python API
73
+
74
+ | Function | Description |
75
+ |---|---|
76
+ | `set_provider(name_or_url, ...)` | Set active provider (`'eurostat'`, `'istat'`, or custom URL) |
77
+ | `get_provider()` | Return active provider config dict |
78
+ | `all_available()` | List all datasets → Polars DataFrame |
79
+ | `search_dataset(keyword)` | Search by keyword in description |
80
+ | `load_dataset(id)` | Create a dataset object (dict) |
81
+ | `print_dataset(ds)` | Print dataset summary |
82
+ | `dimensions_info(ds)` | Dimension metadata → Polars DataFrame |
83
+ | `get_dimension_values(ds, dim)` | Codelist values for a dimension |
84
+ | `get_available_values(ds)` | Values actually present in the data (via `availableconstraint`) |
85
+ | `set_filters(ds, **kwargs)` | Set dimension filters |
86
+ | `reset_filters(ds)` | Reset all filters to `"."` (all) |
87
+ | `get_data(ds, ...)` | Retrieve data → Polars DataFrame |
88
+ | `fetch(id, ..., **filters)` | One-liner: load dataset + set filters + get data |
89
+ | `set_timeout(seconds)` | Get/set API timeout (default: 300 s) |
90
+ | `parse_time_period(series)` | Convert SDMX time strings to dates |
91
+
92
+ ### `get_data` and `fetch` parameters
93
+
94
+ | Parameter | Type | Description |
95
+ |---|---|---|
96
+ | `start_period` | `str` | Start date: `"2020"`, `"2020-Q1"`, `"2020-01"` |
97
+ | `end_period` | `str` | End date (same formats) |
98
+ | `last_n_observations` | `int` | Return only last N observations per series |
99
+ | `first_n_observations` | `int` | Return only first N observations per series |
100
+
101
+ ## Example: EU Unemployment Rate
102
+
103
+ ```python
104
+ import opensdmx
105
+ from plotnine import ggplot, aes, geom_line, geom_point, labs, theme_minimal, scale_x_date
106
+
107
+ # Eurostat monthly unemployment by sex and age
108
+ ds = opensdmx.load_dataset("une_rt_m")
109
+ ds = opensdmx.set_filters(ds, freq="M", geo="IT", sex="T", age="TOTAL", s_adj="SA", unit="PC_ACT")
110
+ data = opensdmx.get_data(ds, start_period="2015", last_n_observations=60)
111
+
112
+ import polars as pl
113
+ data = data.with_columns(pl.col("OBS_VALUE").cast(pl.Float64))
114
+
115
+ plot = (
116
+ ggplot(data.to_pandas(), aes(x="TIME_PERIOD", y="OBS_VALUE"))
117
+ + geom_line(color="#1f77b4", size=1)
118
+ + geom_point(color="#1f77b4", size=0.8)
119
+ + labs(title="Italy Unemployment Rate (Monthly)", x="Year", y="Rate (%)")
120
+ + scale_x_date(date_breaks="2 years", date_labels="%Y")
121
+ + theme_minimal()
122
+ )
123
+ plot.save("unemployment.png", dpi=150, width=10, height=5)
124
+ ```
125
+
126
+ ## CLI
127
+
128
+ ### Commands
129
+
130
+ All commands accept `--provider` (`-p`) to select the provider.
131
+
132
+ | Command | Description |
133
+ |---|---|
134
+ | `opensdmx search <keyword> [-p provider]` | Keyword search in dataset descriptions |
135
+ | `opensdmx search --semantic <query>` | Semantic search (requires `opensdmx embed`) |
136
+ | `opensdmx embed [-p provider]` | Build semantic embeddings cache via Ollama |
137
+ | `opensdmx info <id> [-p provider]` | Show dataset metadata and dimensions |
138
+ | `opensdmx values <id> <dim> [-p provider]` | Show codelist values for a dimension |
139
+ | `opensdmx constraints <id> [dim] [-p provider]` | Show values actually present in the dataflow (via `availableconstraint`) |
140
+ | `opensdmx get <id> [--DIM VALUE] [--start-period P] [--end-period P] [--last-n N] [--first-n N] [--out file] [-p provider]` | Download data (CSV/parquet/JSON) |
141
+ | `opensdmx plot <id> [--DIM VALUE] [--out file] [-p provider]` | Plot data as line chart |
142
+ | `opensdmx blacklist [-p provider]` | List and remove datasets from the unavailability blacklist |
143
+
144
+ ### Examples
145
+
146
+ ```bash
147
+ # Eurostat (default)
148
+ opensdmx search "unemployment"
149
+ opensdmx info une_rt_m
150
+ opensdmx constraints une_rt_m
151
+ opensdmx constraints une_rt_m geo
152
+ opensdmx get une_rt_m --freq M --geo IT --out data.csv
153
+
154
+ # Other providers
155
+ opensdmx search "disoccupazione" --provider istat
156
+ opensdmx get 151_929 --provider istat --FREQ A --REF_AREA IT --out data.csv
157
+ opensdmx search "GDP" --provider oecd
158
+ opensdmx search "inflation" --provider ecb
159
+
160
+ ```
161
+
162
+ ### Semantic search setup
163
+
164
+ Requires [Ollama](https://ollama.com) with the `nomic-embed-text-v2-moe` model:
165
+
166
+ ```bash
167
+ ollama pull nomic-embed-text-v2-moe
168
+ opensdmx embed # build embeddings for default provider (eurostat)
169
+ opensdmx embed -p istat # build embeddings for ISTAT
170
+ opensdmx search --semantic "unemployment"
171
+ ```
172
+
173
+ ### Caching
174
+
175
+ Cache is namespaced per provider under `~/.cache/opensdmx/{AGENCY_ID}/`.
176
+
177
+ | File | Content | TTL |
178
+ |---|---|---|
179
+ | `~/.cache/opensdmx/ESTAT/dataflows.parquet` | Eurostat catalog | 24h |
180
+ | `~/.cache/opensdmx/ESTAT/cache.db` | Dimensions, codelists, constraints (SQLite) | 7 days |
181
+ | `~/.cache/opensdmx/IT1/dataflows.parquet` | ISTAT catalog | 24h |
182
+ | `~/.cache/opensdmx/IT1/cache.db` | ISTAT SQLite cache | 7 days |
183
+
184
+ ## Timeout
185
+
186
+ ```python
187
+ opensdmx.set_timeout() # get current timeout (default: 300s)
188
+ opensdmx.set_timeout(600) # set to 10 minutes
189
+ ```
190
+
191
+ ## Acknowledgements
192
+
193
+ Inspired by [istatR](https://github.com/jfulponi/istatR) by [@jfulponi](https://github.com/jfulponi) and [istatapi](https://github.com/Attol8/istatapi) by [@Attol8](https://github.com/Attol8).
194
+
195
+ ## License
196
+
197
+ MIT License — Copyright (c) 2026 Andrea Borruso
@@ -0,0 +1,33 @@
1
+ [project]
2
+ name = "opensdmx"
3
+ version = "0.2.1"
4
+ description = "Simple Python interface to any SDMX 2.1 REST API (Eurostat, ISTAT, and more)"
5
+ readme = "README.md"
6
+ authors = [
7
+ { name = "Andrea Borruso", email = "aborruso@gmail.com" }
8
+ ]
9
+ license = { text = "MIT" }
10
+ requires-python = ">=3.12"
11
+ dependencies = [
12
+ "duckdb>=1.4.4",
13
+ "httpx>=0.28.1",
14
+ "inquirerpy>=0.3.4",
15
+ "lxml>=6.0.2",
16
+ "numpy>=2.4.2",
17
+ "ollama>=0.6.1",
18
+ "plotnine>=0.15.3",
19
+ "polars>=1.38.1",
20
+ "pyarrow>=23.0.1",
21
+ "questionary>=2.1.1",
22
+ "rich>=14.3.3",
23
+ "tenacity>=9.1.4",
24
+ "typer>=0.24.1",
25
+ "chatlas[google]>=0.7",
26
+ ]
27
+
28
+ [project.scripts]
29
+ opensdmx = "opensdmx:main"
30
+
31
+ [build-system]
32
+ requires = ["uv_build>=0.9.7,<0.10.0"]
33
+ build-backend = "uv_build"
@@ -0,0 +1,36 @@
1
+ """opensdmx — Python interface to any SDMX 2.1 REST API."""
2
+
3
+ from .base import get_provider, set_provider, set_timeout
4
+ from .discovery import (
5
+ ConstraintsUnavailable,
6
+ all_available,
7
+ dimensions_info,
8
+ get_available_values,
9
+ get_dimension_values,
10
+ load_dataset,
11
+ print_dataset,
12
+ reset_filters,
13
+ search_dataset,
14
+ set_filters,
15
+ )
16
+ from .retrieval import fetch, get_data, parse_time_period
17
+ from .cli import main
18
+
19
+ __all__ = [
20
+ "ConstraintsUnavailable",
21
+ "set_provider",
22
+ "get_provider",
23
+ "all_available",
24
+ "search_dataset",
25
+ "load_dataset",
26
+ "print_dataset",
27
+ "dimensions_info",
28
+ "get_dimension_values",
29
+ "get_available_values",
30
+ "set_filters",
31
+ "reset_filters",
32
+ "get_data",
33
+ "fetch",
34
+ "set_timeout",
35
+ "parse_time_period",
36
+ ]