eolas-data 1.3.0__tar.gz → 1.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {eolas_data-1.3.0 → eolas_data-1.3.2}/.github/workflows/publish.yml +18 -0
- eolas_data-1.3.2/.github/workflows/smoke.yml +29 -0
- eolas_data-1.3.2/.github/workflows/test.yml +40 -0
- eolas_data-1.3.2/PKG-INFO +329 -0
- eolas_data-1.3.2/README.md +281 -0
- eolas_data-1.3.2/eolas_data/__init__.py +35 -0
- eolas_data-1.3.2/eolas_data/_dataset_names.py +3095 -0
- eolas_data-1.3.2/eolas_data/cdc.py +233 -0
- {eolas_data-1.3.0 → eolas_data-1.3.2}/eolas_data/cli.py +680 -41
- eolas_data-1.3.2/eolas_data/client.py +2139 -0
- eolas_data-1.3.2/eolas_data/console.py +25 -0
- {eolas_data-1.3.0 → eolas_data-1.3.2}/eolas_data/dataset.py +17 -3
- eolas_data-1.3.2/eolas_data/exceptions.py +94 -0
- eolas_data-1.3.2/eolas_data/library.py +260 -0
- eolas_data-1.3.2/eolas_data/meta.py +92 -0
- eolas_data-1.3.2/eolas_data/rows.py +36 -0
- {eolas_data-1.3.0 → eolas_data-1.3.2}/eolas_data/schedule.py +19 -8
- eolas_data-1.3.2/eolas_data/search.py +133 -0
- {eolas_data-1.3.0 → eolas_data-1.3.2}/pyproject.toml +14 -1
- eolas_data-1.3.2/scripts/preflight.sh +43 -0
- eolas_data-1.3.2/tests/test_as_arrow.py +51 -0
- eolas_data-1.3.2/tests/test_cdc_roundtrip.py +166 -0
- {eolas_data-1.3.0 → eolas_data-1.3.2}/tests/test_cli.py +178 -3
- {eolas_data-1.3.0 → eolas_data-1.3.2}/tests/test_client.py +261 -20
- eolas_data-1.3.2/tests/test_keyring.py +266 -0
- eolas_data-1.3.2/tests/test_library.py +235 -0
- eolas_data-1.3.2/tests/test_meta.py +75 -0
- eolas_data-1.3.2/tests/test_progress.py +447 -0
- eolas_data-1.3.2/tests/test_rows.py +24 -0
- {eolas_data-1.3.0 → eolas_data-1.3.2}/tests/test_schedule.py +10 -1
- eolas_data-1.3.2/tests/test_search.py +77 -0
- eolas_data-1.3.2/tests/test_smoke_live.py +56 -0
- eolas_data-1.3.2/tests/test_sync_bulk.py +440 -0
- eolas_data-1.3.2/tests/test_sync_changes.py +791 -0
- eolas_data-1.3.0/PKG-INFO +0 -216
- eolas_data-1.3.0/README.md +0 -174
- eolas_data-1.3.0/eolas_data/__init__.py +0 -16
- eolas_data-1.3.0/eolas_data/_dataset_names.py +0 -1455
- eolas_data-1.3.0/eolas_data/client.py +0 -339
- eolas_data-1.3.0/eolas_data/exceptions.py +0 -20
- {eolas_data-1.3.0 → eolas_data-1.3.2}/.github/workflows/catalog-drift.yml +0 -0
- {eolas_data-1.3.0 → eolas_data-1.3.2}/.gitignore +0 -0
- {eolas_data-1.3.0 → eolas_data-1.3.2}/eolas_data/_regen_names.py +0 -0
|
@@ -6,7 +6,25 @@ on:
|
|
|
6
6
|
- "v*"
|
|
7
7
|
|
|
8
8
|
jobs:
|
|
9
|
+
test:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
env:
|
|
12
|
+
PYTHON_KEYRING_BACKEND: keyring.backends.null.Keyring
|
|
13
|
+
steps:
|
|
14
|
+
- uses: actions/checkout@v4
|
|
15
|
+
|
|
16
|
+
- uses: actions/setup-python@v5
|
|
17
|
+
with:
|
|
18
|
+
python-version: "3.12"
|
|
19
|
+
|
|
20
|
+
- name: Install package + dev deps
|
|
21
|
+
run: pip install -e ".[dev,secure]"
|
|
22
|
+
|
|
23
|
+
- name: pytest
|
|
24
|
+
run: pytest -q -m "not integration"
|
|
25
|
+
|
|
9
26
|
publish:
|
|
27
|
+
needs: test
|
|
10
28
|
runs-on: ubuntu-latest
|
|
11
29
|
environment: pypi
|
|
12
30
|
permissions:
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
name: Live API smoke
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
schedule:
|
|
5
|
+
- cron: "0 8 * * 1" # Mondays 08:00 UTC
|
|
6
|
+
workflow_dispatch:
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
smoke:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
steps:
|
|
12
|
+
- uses: actions/checkout@v4
|
|
13
|
+
|
|
14
|
+
- uses: actions/setup-python@v5
|
|
15
|
+
with:
|
|
16
|
+
python-version: "3.12"
|
|
17
|
+
|
|
18
|
+
- name: Install package + dev deps
|
|
19
|
+
run: pip install -e ".[dev]"
|
|
20
|
+
|
|
21
|
+
- name: Live smoke tests
|
|
22
|
+
env:
|
|
23
|
+
EOLAS_API_KEY: ${{ secrets.EOLAS_API_KEY }}
|
|
24
|
+
run: |
|
|
25
|
+
if [ -z "$EOLAS_API_KEY" ]; then
|
|
26
|
+
echo "EOLAS_API_KEY secret not configured — skipping smoke tests."
|
|
27
|
+
exit 0
|
|
28
|
+
fi
|
|
29
|
+
pytest -q -m integration tests/test_smoke_live.py
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
name: Tests
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [master]
|
|
6
|
+
pull_request:
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
test:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
strategy:
|
|
12
|
+
fail-fast: false
|
|
13
|
+
matrix:
|
|
14
|
+
python-version: ["3.10", "3.12", "3.13"]
|
|
15
|
+
env:
|
|
16
|
+
PYTHON_KEYRING_BACKEND: keyring.backends.null.Keyring
|
|
17
|
+
steps:
|
|
18
|
+
- uses: actions/checkout@v4
|
|
19
|
+
|
|
20
|
+
- uses: actions/setup-python@v5
|
|
21
|
+
with:
|
|
22
|
+
python-version: ${{ matrix.python-version }}
|
|
23
|
+
|
|
24
|
+
- name: Install package + dev + secure (install-tests the keyring extra)
|
|
25
|
+
run: pip install -e ".[dev,secure]"
|
|
26
|
+
|
|
27
|
+
- name: pytest (unit suite)
|
|
28
|
+
run: pytest -q -m "not integration"
|
|
29
|
+
|
|
30
|
+
coverage:
|
|
31
|
+
runs-on: ubuntu-latest
|
|
32
|
+
env:
|
|
33
|
+
PYTHON_KEYRING_BACKEND: keyring.backends.null.Keyring
|
|
34
|
+
steps:
|
|
35
|
+
- uses: actions/checkout@v4
|
|
36
|
+
- uses: actions/setup-python@v5
|
|
37
|
+
with:
|
|
38
|
+
python-version: "3.12"
|
|
39
|
+
- run: pip install -e ".[dev,secure]" pytest-cov
|
|
40
|
+
- run: pytest -q -m "not integration" --cov=eolas_data --cov-report=term-missing
|
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: eolas-data
|
|
3
|
+
Version: 1.3.2
|
|
4
|
+
Summary: Python client for the eolas.fyi statistical data API (NZ, Australia, OECD)
|
|
5
|
+
Project-URL: Homepage, https://eolas.fyi
|
|
6
|
+
Project-URL: Documentation, https://docs.eolas.fyi/
|
|
7
|
+
Project-URL: Repository, https://github.com/phildonovan/eolas-data
|
|
8
|
+
Project-URL: Bug Tracker, https://github.com/phildonovan/eolas-data/issues
|
|
9
|
+
Author-email: Virtus Solutions <phil@virtus-solutions.io>
|
|
10
|
+
License: MIT
|
|
11
|
+
Keywords: api,australia,economics,eolas,new-zealand,statistics
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
|
+
Requires-Dist: pandas>=1.5
|
|
24
|
+
Requires-Dist: pyarrow>=14
|
|
25
|
+
Requires-Dist: requests>=2.28
|
|
26
|
+
Requires-Dist: rich>=13
|
|
27
|
+
Requires-Dist: tqdm>=4.60
|
|
28
|
+
Requires-Dist: typer>=0.12
|
|
29
|
+
Provides-Extra: cli
|
|
30
|
+
Requires-Dist: rich>=13; extra == 'cli'
|
|
31
|
+
Requires-Dist: typer>=0.12; extra == 'cli'
|
|
32
|
+
Provides-Extra: dev
|
|
33
|
+
Requires-Dist: geopandas>=0.14; extra == 'dev'
|
|
34
|
+
Requires-Dist: pandas; extra == 'dev'
|
|
35
|
+
Requires-Dist: pytest; extra == 'dev'
|
|
36
|
+
Requires-Dist: responses; extra == 'dev'
|
|
37
|
+
Requires-Dist: rich>=13; extra == 'dev'
|
|
38
|
+
Requires-Dist: shapely>=2.0; extra == 'dev'
|
|
39
|
+
Requires-Dist: typer>=0.12; extra == 'dev'
|
|
40
|
+
Provides-Extra: geo
|
|
41
|
+
Requires-Dist: geopandas>=0.14; extra == 'geo'
|
|
42
|
+
Requires-Dist: shapely>=2.0; extra == 'geo'
|
|
43
|
+
Provides-Extra: polars
|
|
44
|
+
Requires-Dist: polars>=0.20; extra == 'polars'
|
|
45
|
+
Provides-Extra: secure
|
|
46
|
+
Requires-Dist: keyring>=23.0; extra == 'secure'
|
|
47
|
+
Description-Content-Type: text/markdown
|
|
48
|
+
|
|
49
|
+
# eolas-data
|
|
50
|
+
|
|
51
|
+
Python client for the [eolas.fyi](https://eolas.fyi) statistical data API — 1,400+ official New Zealand statistical & geospatial datasets, plus OECD data for international comparisons, served as tidy `pandas` DataFrames (or `polars` / `geopandas` if you prefer).
|
|
52
|
+
|
|
53
|
+
_Coverage is New Zealand + OECD today. Australian sources are on the roadmap — not yet available; OECD data already includes Australia (and other OECD members) for cross-country comparisons._
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
pip install eolas-data
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Quickstart
|
|
60
|
+
|
|
61
|
+
```python
|
|
62
|
+
from eolas_data import Client
|
|
63
|
+
|
|
64
|
+
client = Client("your_api_key") # or set EOLAS_API_KEY in env
|
|
65
|
+
|
|
66
|
+
# CPI index (monthly, RBNZ M1) — the usual Treasury/analyst choice
|
|
67
|
+
cpi = client.rbnz("rbnz_m1_prices", start="2020-01-01")
|
|
68
|
+
|
|
69
|
+
# OECD macro indicators (quarterly YoY % — not CPI index levels)
|
|
70
|
+
inflation = client.oecd("nz_cpi", start="2020-01-01")
|
|
71
|
+
gdp = client.oecd("nz_gdp_growth")
|
|
72
|
+
|
|
73
|
+
# Discovery
|
|
74
|
+
all_datasets = client.list()
|
|
75
|
+
nz_only = client.list("Stats NZ")
|
|
76
|
+
client.search("cpi") # expands aliases; surfaces rbnz_m1_prices before nz_cpi
|
|
77
|
+
meta = client.info("rbnz_m1_prices")
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Get an API key at <https://eolas.fyi/signup>. Free plan is 10 requests/month; Pro ($49/month) is unlimited.
|
|
81
|
+
|
|
82
|
+
## Quick setup (workstation)
|
|
83
|
+
|
|
84
|
+
Two one-off commands make every future session frictionless:
|
|
85
|
+
|
|
86
|
+
**1. Save your API key** to the OS keyring (macOS Keychain / Windows Credential Manager / Linux Secret Service) so `Client()` finds it automatically — no env var, no pasting:
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
pip install 'eolas-data[secure]' # adds the keyring package
|
|
90
|
+
eolas auth save-key # interactive prompt
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
from eolas_data import Client
|
|
95
|
+
client = Client() # key read from OS keyring automatically
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
**2. Set a library directory** so downloaded bulk files land somewhere permanent instead of the transient `~/.cache/eolas/` OS cache:
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
eolas library set ~/eolas-library # writes to ~/.eolas/config.json
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
Or set the env var instead (useful for CI / Docker):
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
export EOLAS_LIBRARY=~/eolas-library
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
After setting the library, `client.get_local("nz_parcels")` will use `~/eolas-library/` automatically.
|
|
111
|
+
|
|
112
|
+
The keyring slot and config file are shared with the R `eolas` client — a key saved from Python is immediately readable from R and vice versa (see the [R client README](https://github.com/phildonovan/eolas-r)).
|
|
113
|
+
|
|
114
|
+
---
|
|
115
|
+
|
|
116
|
+
## Command-line interface
|
|
117
|
+
|
|
118
|
+
`pip install eolas-data[cli]` adds an `eolas` command for browsing, fetching, and
|
|
119
|
+
scheduling — useful for shell scripts, cron jobs, and AI-agent workflows. Rich
|
|
120
|
+
tables by default; pass ``--json`` for newline-delimited JSON in scripts.
|
|
121
|
+
|
|
122
|
+
```bash
|
|
123
|
+
# one-time setup (OS keyring — recommended)
|
|
124
|
+
pip install 'eolas-data[secure]'
|
|
125
|
+
eolas auth save-key
|
|
126
|
+
|
|
127
|
+
# or config file (no extra install)
|
|
128
|
+
eolas auth set-key
|
|
129
|
+
eolas health
|
|
130
|
+
|
|
131
|
+
# discover
|
|
132
|
+
eolas datasets list --source "Stats NZ"
|
|
133
|
+
eolas datasets list --search cpi # table + CPI guidance note
|
|
134
|
+
eolas datasets list --search cpi --json | jq '.[].name'
|
|
135
|
+
eolas datasets info rbnz_m1_prices
|
|
136
|
+
eolas datasets preview rbnz_m1_prices --limit 5
|
|
137
|
+
|
|
138
|
+
# fetch (verb matches the Python lib's client.get())
|
|
139
|
+
eolas get rbnz_m1_prices --format csv > cpi.csv
|
|
140
|
+
eolas get nz_cpi --start 2020-01-01 --format json | jq '.[].value' # OECD YoY %
|
|
141
|
+
eolas get nz_meshblock_2023 --format parquet --out sa2.parquet
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
### Scheduling
|
|
145
|
+
|
|
146
|
+
Set up recurring fetches without touching crontab/Task Scheduler syntax. Works
|
|
147
|
+
on Linux, macOS (cron), and Windows (Task Scheduler).
|
|
148
|
+
|
|
149
|
+
```bash
|
|
150
|
+
eolas schedule add nz_cpi --daily --out ~/data/cpi.csv
|
|
151
|
+
eolas schedule add nz_gdp_growth --weekly --out ~/data/gdp.csv
|
|
152
|
+
eolas schedule add rbnz_b1_exchange_rates_monthly --cron "0 */6 * * *" --out ~/data/fx.csv # POSIX only
|
|
153
|
+
|
|
154
|
+
eolas schedule list
|
|
155
|
+
eolas schedule remove nz_cpi
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
Daily is the default. Pre-flight check refuses to install a schedule unless
|
|
159
|
+
your API key is configured (otherwise the job would fail silently forever).
|
|
160
|
+
|
|
161
|
+
### Integrations (Enterprise plan)
|
|
162
|
+
|
|
163
|
+
Generate ready-to-run connector configs for popular data-pipeline tools — eolas
|
|
164
|
+
becomes a one-command source for Meltano, Fivetran, or Azure Data Factory.
|
|
165
|
+
|
|
166
|
+
```bash
|
|
167
|
+
eolas integrate meltano --datasets nz_cpi,nz_gdp_growth --output ./my-pipeline/
|
|
168
|
+
eolas integrate fivetran --datasets nz_cpi
|
|
169
|
+
eolas integrate azure-data-factory --datasets nz_cpi,nz_gdp_growth
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
The generated directory has everything needed to plug into your destination
|
|
173
|
+
warehouse: `meltano.yml`, `fivetran.yml`, or ADF JSON resources, plus a `README.md`
|
|
174
|
+
walking through the rest of the setup. Non-Enterprise users see a clear
|
|
175
|
+
upgrade pointer; the gating lives server-side so the capability is bypass-proof.
|
|
176
|
+
|
|
177
|
+
### Exit codes
|
|
178
|
+
|
|
179
|
+
Distinct exit codes per error class, for shell scripts and agents:
|
|
180
|
+
|
|
181
|
+
| Code | Meaning |
|
|
182
|
+
|---|---|
|
|
183
|
+
| `0` | Success |
|
|
184
|
+
| `1` | Generic error |
|
|
185
|
+
| `2` | Auth (`AuthenticationError`, including Enterprise-gate 403) |
|
|
186
|
+
| `3` | Rate limit hit |
|
|
187
|
+
| `4` | Dataset / resource not found |
|
|
188
|
+
| `5` | Other API error |
|
|
189
|
+
| `64` | Bad usage (mirrors `sysexits.h`) |
|
|
190
|
+
|
|
191
|
+
## Performance (Arrow)
|
|
192
|
+
|
|
193
|
+
`client.get()` transparently negotiates **Apache Arrow** over the wire — same
|
|
194
|
+
`DataFrame` back, typically **5–10× faster end-to-end** on large pulls, with
|
|
195
|
+
an automatic JSON fallback. No setup: `pyarrow` ships with `eolas-data`, so
|
|
196
|
+
this is on by default; `format=` (`"json"`/`"csv"`) is only for the rare case
|
|
197
|
+
you want the raw text payload.
|
|
198
|
+
|
|
199
|
+
For a columnar file (CLI), use `--format parquet --out FILE`; via the REST
|
|
200
|
+
API directly, `?format=parquet`. Full benchmark: [docs.eolas.fyi → Python
|
|
201
|
+
reference → Performance](https://docs.eolas.fyi/python/reference/).
|
|
202
|
+
|
|
203
|
+
## Bulk downloads — use `get_local()` for whole datasets
|
|
204
|
+
|
|
205
|
+
`client.get()` hits the live `/data` endpoint (good for slices and small pulls). For whole datasets — especially large or geospatial layers — use `get_local()`. It syncs a CDN-cached Parquet/GeoParquet file to your library directory and reads from disk on subsequent calls.
|
|
206
|
+
|
|
207
|
+
```python
|
|
208
|
+
# Whole-dataset path: nz_parcels from CDN-cached GeoParquet (seconds, not a 15-min Iceberg scan)
|
|
209
|
+
gdf = client.get_local("nz_parcels") # geopandas.GeoDataFrame when [geo] is installed
|
|
210
|
+
df = client.get_local("nz_cpi") # tidy DataFrame from cached Parquet
|
|
211
|
+
|
|
212
|
+
# Live path: date slices, row limits, licence-restricted sources (e.g. OECD)
|
|
213
|
+
df = client.get("nz_cpi", start="2020-01-01")
|
|
214
|
+
df = client.get("nz_cpi", limit=100)
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
Use `get_local()` when you need to control `cache_dir`, `format`, or `freshness`:
|
|
218
|
+
|
|
219
|
+
```python
|
|
220
|
+
# Explicit cache+sync with extra options
|
|
221
|
+
gdf = client.get_local("nz_parcels")
|
|
222
|
+
gdf = client.get_local("nz_parcels", cache_dir="/data/eolas", freshness="monthly")
|
|
223
|
+
df = client.get_local("nz_cpi", format="csv_gz")
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
For advanced control over the sync lifecycle (sidecar tracking, atomic replace), use `sync_bulk()` directly. For one-shot bytes-or-path downloads, use `download_bulk()`:
|
|
227
|
+
|
|
228
|
+
```python
|
|
229
|
+
r = client.sync_bulk("nz_cpi", path="nz_cpi.parquet")
|
|
230
|
+
# r.status ∈ {"downloaded", "unchanged", "updated"}; r.bytes_downloaded == 0 when unchanged.
|
|
231
|
+
path = client.download_bulk("treasury_fiscal_spending", path="t.parquet")
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
**Progress bars:** `download_bulk`, `sync_bulk`, and `get_local` all show a `tqdm` progress bar automatically in interactive terminals and VSCode notebooks, so 1+ GB files are never silent. Pass `progress=False` to suppress in scripts, or set `EOLAS_NO_PROGRESS=1` in the environment for a CI-wide escape hatch. The `--no-progress` flag does the same from the CLI.
|
|
235
|
+
|
|
236
|
+
CLI mirror: `eolas download <name>` for one-shot, `eolas sync <name> [--watch hourly]` for an incremental check. Full docs: [docs.eolas.fyi/bulk-downloads/](https://docs.eolas.fyi/bulk-downloads/).
|
|
237
|
+
|
|
238
|
+
## Geospatial
|
|
239
|
+
|
|
240
|
+
Datasets with a `geometry_wkt` column auto-convert to `geopandas.GeoDataFrame` if `geopandas` is installed:
|
|
241
|
+
|
|
242
|
+
```bash
|
|
243
|
+
pip install eolas-data[geo]
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
```python
|
|
247
|
+
gdf = client.get("nz_addresses") # GeoDataFrame
|
|
248
|
+
df = client.get("nz_addresses", as_geo=False) # plain DataFrame, WKT preserved
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
## Working with large geo datasets
|
|
252
|
+
|
|
253
|
+
The 5.4M-row `linz.nz_parcels` table allocates ~10 GB when materialised as a GeoDataFrame. Pass `as_arrow=True` to skip all shapely allocation and get a zero-copy `pyarrow.Table` instead — geometry stays as Arrow buffers until you need it:
|
|
254
|
+
|
|
255
|
+
```python
|
|
256
|
+
# Zero-copy Arrow table — no shapely allocation
|
|
257
|
+
tbl = client.linz("nz_parcels", as_arrow=True)
|
|
258
|
+
|
|
259
|
+
# Filter before materialising — dramatically cheaper than loading the full GeoDataFrame
|
|
260
|
+
import duckdb
|
|
261
|
+
result = duckdb.sql("""
|
|
262
|
+
SELECT parcel_id, geometry_wkt
|
|
263
|
+
FROM tbl
|
|
264
|
+
WHERE ST_Within(ST_GeomFromText(geometry_wkt),
|
|
265
|
+
ST_GeomFromText('POLYGON((174.7 -41.3, 174.8 -41.3, 174.8 -41.4, 174.7 -41.4, 174.7 -41.3))'))
|
|
266
|
+
""").df()
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
`as_arrow=True` works on all datasets (geo or non-geo), all routing modes (live, cached, auto), and all source helpers. It cannot be combined with `as_geo=True`.
|
|
270
|
+
|
|
271
|
+
## Polars
|
|
272
|
+
|
|
273
|
+
```bash
|
|
274
|
+
pip install eolas-data[polars]
|
|
275
|
+
```
|
|
276
|
+
|
|
277
|
+
```python
|
|
278
|
+
df = client.get("nz_cpi", engine="polars")
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
## Plotting
|
|
282
|
+
|
|
283
|
+
`Dataset` is a `pandas.DataFrame` subclass — use matplotlib / seaborn / plotly
|
|
284
|
+
directly. No bundled plot helper, because there's no universal "right" plot for
|
|
285
|
+
a tidy dataset (single-series time series vs. wide multi-measure vs. WKT
|
|
286
|
+
geometry all need different code).
|
|
287
|
+
|
|
288
|
+
```python
|
|
289
|
+
import matplotlib.pyplot as plt
|
|
290
|
+
|
|
291
|
+
df = client.statsnz("nz_cpi")
|
|
292
|
+
df.plot(x="date", y="value")
|
|
293
|
+
plt.show()
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
## Type stubs
|
|
297
|
+
|
|
298
|
+
Dataset names are exposed as a `Literal` so IDEs autocomplete the catalog:
|
|
299
|
+
|
|
300
|
+
```python
|
|
301
|
+
from eolas_data import Client
|
|
302
|
+
|
|
303
|
+
client = Client()
|
|
304
|
+
client.get("nz_") # autocomplete shows nz_cpi, nz_gdp_growth, ...
|
|
305
|
+
```
|
|
306
|
+
|
|
307
|
+
The list is regenerated from the live API at release time. Passing a name not in the snapshot still works at runtime — the type hint just won't autocomplete it. Catalog snapshot date is exposed as `eolas_data._dataset_names.CATALOG_SNAPSHOT_DATE`.
|
|
308
|
+
|
|
309
|
+
## Testing
|
|
310
|
+
|
|
311
|
+
```bash
|
|
312
|
+
# unit tests (mocked HTTP — no API key needed)
|
|
313
|
+
pytest -q -m "not integration"
|
|
314
|
+
|
|
315
|
+
# live smoke (requires EOLAS_API_KEY)
|
|
316
|
+
EOLAS_API_KEY=vs_... pytest -q -m integration tests/test_smoke_live.py
|
|
317
|
+
```
|
|
318
|
+
|
|
319
|
+
CI runs the unit suite on Python 3.10, 3.12, and 3.13 on every push/PR. A weekly workflow optionally runs live smoke tests when `EOLAS_API_KEY` is configured as a repository secret.
|
|
320
|
+
|
|
321
|
+
## Releasing
|
|
322
|
+
|
|
323
|
+
See [`docs/clients.md`](https://github.com/phildonovan/eolas/blob/master/docs/clients.md) in the eolas data repo for the tagged-release flow and PyPI token rotation.
|
|
324
|
+
|
|
325
|
+
Before each release: `python -m eolas_data._regen_names` to refresh the dataset name stubs from the live API, commit the change, then tag and push.
|
|
326
|
+
|
|
327
|
+
## License
|
|
328
|
+
|
|
329
|
+
MIT
|