eolas-data 1.3.0__tar.gz → 1.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. {eolas_data-1.3.0 → eolas_data-1.3.2}/.github/workflows/publish.yml +18 -0
  2. eolas_data-1.3.2/.github/workflows/smoke.yml +29 -0
  3. eolas_data-1.3.2/.github/workflows/test.yml +40 -0
  4. eolas_data-1.3.2/PKG-INFO +329 -0
  5. eolas_data-1.3.2/README.md +281 -0
  6. eolas_data-1.3.2/eolas_data/__init__.py +35 -0
  7. eolas_data-1.3.2/eolas_data/_dataset_names.py +3095 -0
  8. eolas_data-1.3.2/eolas_data/cdc.py +233 -0
  9. {eolas_data-1.3.0 → eolas_data-1.3.2}/eolas_data/cli.py +680 -41
  10. eolas_data-1.3.2/eolas_data/client.py +2139 -0
  11. eolas_data-1.3.2/eolas_data/console.py +25 -0
  12. {eolas_data-1.3.0 → eolas_data-1.3.2}/eolas_data/dataset.py +17 -3
  13. eolas_data-1.3.2/eolas_data/exceptions.py +94 -0
  14. eolas_data-1.3.2/eolas_data/library.py +260 -0
  15. eolas_data-1.3.2/eolas_data/meta.py +92 -0
  16. eolas_data-1.3.2/eolas_data/rows.py +36 -0
  17. {eolas_data-1.3.0 → eolas_data-1.3.2}/eolas_data/schedule.py +19 -8
  18. eolas_data-1.3.2/eolas_data/search.py +133 -0
  19. {eolas_data-1.3.0 → eolas_data-1.3.2}/pyproject.toml +14 -1
  20. eolas_data-1.3.2/scripts/preflight.sh +43 -0
  21. eolas_data-1.3.2/tests/test_as_arrow.py +51 -0
  22. eolas_data-1.3.2/tests/test_cdc_roundtrip.py +166 -0
  23. {eolas_data-1.3.0 → eolas_data-1.3.2}/tests/test_cli.py +178 -3
  24. {eolas_data-1.3.0 → eolas_data-1.3.2}/tests/test_client.py +261 -20
  25. eolas_data-1.3.2/tests/test_keyring.py +266 -0
  26. eolas_data-1.3.2/tests/test_library.py +235 -0
  27. eolas_data-1.3.2/tests/test_meta.py +75 -0
  28. eolas_data-1.3.2/tests/test_progress.py +447 -0
  29. eolas_data-1.3.2/tests/test_rows.py +24 -0
  30. {eolas_data-1.3.0 → eolas_data-1.3.2}/tests/test_schedule.py +10 -1
  31. eolas_data-1.3.2/tests/test_search.py +77 -0
  32. eolas_data-1.3.2/tests/test_smoke_live.py +56 -0
  33. eolas_data-1.3.2/tests/test_sync_bulk.py +440 -0
  34. eolas_data-1.3.2/tests/test_sync_changes.py +791 -0
  35. eolas_data-1.3.0/PKG-INFO +0 -216
  36. eolas_data-1.3.0/README.md +0 -174
  37. eolas_data-1.3.0/eolas_data/__init__.py +0 -16
  38. eolas_data-1.3.0/eolas_data/_dataset_names.py +0 -1455
  39. eolas_data-1.3.0/eolas_data/client.py +0 -339
  40. eolas_data-1.3.0/eolas_data/exceptions.py +0 -20
  41. {eolas_data-1.3.0 → eolas_data-1.3.2}/.github/workflows/catalog-drift.yml +0 -0
  42. {eolas_data-1.3.0 → eolas_data-1.3.2}/.gitignore +0 -0
  43. {eolas_data-1.3.0 → eolas_data-1.3.2}/eolas_data/_regen_names.py +0 -0
@@ -6,7 +6,25 @@ on:
6
6
  - "v*"
7
7
 
8
8
  jobs:
9
+ test:
10
+ runs-on: ubuntu-latest
11
+ env:
12
+ PYTHON_KEYRING_BACKEND: keyring.backends.null.Keyring
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+
16
+ - uses: actions/setup-python@v5
17
+ with:
18
+ python-version: "3.12"
19
+
20
+ - name: Install package + dev deps
21
+ run: pip install -e ".[dev,secure]"
22
+
23
+ - name: pytest
24
+ run: pytest -q -m "not integration"
25
+
9
26
  publish:
27
+ needs: test
10
28
  runs-on: ubuntu-latest
11
29
  environment: pypi
12
30
  permissions:
@@ -0,0 +1,29 @@
1
+ name: Live API smoke
2
+
3
+ on:
4
+ schedule:
5
+ - cron: "0 8 * * 1" # Mondays 08:00 UTC
6
+ workflow_dispatch:
7
+
8
+ jobs:
9
+ smoke:
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - uses: actions/checkout@v4
13
+
14
+ - uses: actions/setup-python@v5
15
+ with:
16
+ python-version: "3.12"
17
+
18
+ - name: Install package + dev deps
19
+ run: pip install -e ".[dev]"
20
+
21
+ - name: Live smoke tests
22
+ env:
23
+ EOLAS_API_KEY: ${{ secrets.EOLAS_API_KEY }}
24
+ run: |
25
+ if [ -z "$EOLAS_API_KEY" ]; then
26
+ echo "EOLAS_API_KEY secret not configured — skipping smoke tests."
27
+ exit 0
28
+ fi
29
+ pytest -q -m integration tests/test_smoke_live.py
@@ -0,0 +1,40 @@
1
+ name: Tests
2
+
3
+ on:
4
+ push:
5
+ branches: [master]
6
+ pull_request:
7
+
8
+ jobs:
9
+ test:
10
+ runs-on: ubuntu-latest
11
+ strategy:
12
+ fail-fast: false
13
+ matrix:
14
+ python-version: ["3.10", "3.12", "3.13"]
15
+ env:
16
+ PYTHON_KEYRING_BACKEND: keyring.backends.null.Keyring
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+
20
+ - uses: actions/setup-python@v5
21
+ with:
22
+ python-version: ${{ matrix.python-version }}
23
+
24
+ - name: Install package + dev + secure (install-tests the keyring extra)
25
+ run: pip install -e ".[dev,secure]"
26
+
27
+ - name: pytest (unit suite)
28
+ run: pytest -q -m "not integration"
29
+
30
+ coverage:
31
+ runs-on: ubuntu-latest
32
+ env:
33
+ PYTHON_KEYRING_BACKEND: keyring.backends.null.Keyring
34
+ steps:
35
+ - uses: actions/checkout@v4
36
+ - uses: actions/setup-python@v5
37
+ with:
38
+ python-version: "3.12"
39
+ - run: pip install -e ".[dev,secure]" pytest-cov
40
+ - run: pytest -q -m "not integration" --cov=eolas_data --cov-report=term-missing
@@ -0,0 +1,329 @@
1
+ Metadata-Version: 2.4
2
+ Name: eolas-data
3
+ Version: 1.3.2
4
+ Summary: Python client for the eolas.fyi statistical data API (NZ, Australia, OECD)
5
+ Project-URL: Homepage, https://eolas.fyi
6
+ Project-URL: Documentation, https://docs.eolas.fyi/
7
+ Project-URL: Repository, https://github.com/phildonovan/eolas-data
8
+ Project-URL: Bug Tracker, https://github.com/phildonovan/eolas-data/issues
9
+ Author-email: Virtus Solutions <phil@virtus-solutions.io>
10
+ License: MIT
11
+ Keywords: api,australia,economics,eolas,new-zealand,statistics
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Topic :: Scientific/Engineering
22
+ Requires-Python: >=3.10
23
+ Requires-Dist: pandas>=1.5
24
+ Requires-Dist: pyarrow>=14
25
+ Requires-Dist: requests>=2.28
26
+ Requires-Dist: rich>=13
27
+ Requires-Dist: tqdm>=4.60
28
+ Requires-Dist: typer>=0.12
29
+ Provides-Extra: cli
30
+ Requires-Dist: rich>=13; extra == 'cli'
31
+ Requires-Dist: typer>=0.12; extra == 'cli'
32
+ Provides-Extra: dev
33
+ Requires-Dist: geopandas>=0.14; extra == 'dev'
34
+ Requires-Dist: pandas; extra == 'dev'
35
+ Requires-Dist: pytest; extra == 'dev'
36
+ Requires-Dist: responses; extra == 'dev'
37
+ Requires-Dist: rich>=13; extra == 'dev'
38
+ Requires-Dist: shapely>=2.0; extra == 'dev'
39
+ Requires-Dist: typer>=0.12; extra == 'dev'
40
+ Provides-Extra: geo
41
+ Requires-Dist: geopandas>=0.14; extra == 'geo'
42
+ Requires-Dist: shapely>=2.0; extra == 'geo'
43
+ Provides-Extra: polars
44
+ Requires-Dist: polars>=0.20; extra == 'polars'
45
+ Provides-Extra: secure
46
+ Requires-Dist: keyring>=23.0; extra == 'secure'
47
+ Description-Content-Type: text/markdown
48
+
49
+ # eolas-data
50
+
51
+ Python client for the [eolas.fyi](https://eolas.fyi) statistical data API — 1,400+ official New Zealand statistical & geospatial datasets, plus OECD data for international comparisons, served as tidy `pandas` DataFrames (or `polars` / `geopandas` if you prefer).
52
+
53
+ _Coverage is New Zealand + OECD today. Australian sources are on the roadmap — not yet available; OECD data already includes Australia (and other OECD members) for cross-country comparisons._
54
+
55
+ ```bash
56
+ pip install eolas-data
57
+ ```
58
+
59
+ ## Quickstart
60
+
61
+ ```python
62
+ from eolas_data import Client
63
+
64
+ client = Client("your_api_key") # or set EOLAS_API_KEY in env
65
+
66
+ # CPI index (monthly, RBNZ M1) — the usual Treasury/analyst choice
67
+ cpi = client.rbnz("rbnz_m1_prices", start="2020-01-01")
68
+
69
+ # OECD macro indicators (quarterly YoY % — not CPI index levels)
70
+ inflation = client.oecd("nz_cpi", start="2020-01-01")
71
+ gdp = client.oecd("nz_gdp_growth")
72
+
73
+ # Discovery
74
+ all_datasets = client.list()
75
+ nz_only = client.list("Stats NZ")
76
+ client.search("cpi") # expands aliases; surfaces rbnz_m1_prices before nz_cpi
77
+ meta = client.info("rbnz_m1_prices")
78
+ ```
79
+
80
+ Get an API key at <https://eolas.fyi/signup>. Free plan is 10 requests/month; Pro ($49/month) is unlimited.
81
+
82
+ ## Quick setup (workstation)
83
+
84
+ Two one-off commands make every future session frictionless:
85
+
86
+ **1. Save your API key** to the OS keyring (macOS Keychain / Windows Credential Manager / Linux Secret Service) so `Client()` finds it automatically — no env var, no pasting:
87
+
88
+ ```bash
89
+ pip install 'eolas-data[secure]' # adds the keyring package
90
+ eolas auth save-key # interactive prompt
91
+ ```
92
+
93
+ ```python
94
+ from eolas_data import Client
95
+ client = Client() # key read from OS keyring automatically
96
+ ```
97
+
98
+ **2. Set a library directory** so downloaded bulk files land somewhere permanent instead of the transient `~/.cache/eolas/` OS cache:
99
+
100
+ ```bash
101
+ eolas library set ~/eolas-library # writes to ~/.eolas/config.json
102
+ ```
103
+
104
+ Or set the env var instead (useful for CI / Docker):
105
+
106
+ ```bash
107
+ export EOLAS_LIBRARY=~/eolas-library
108
+ ```
109
+
110
+ After setting the library, `client.get_local("nz_parcels")` will use `~/eolas-library/` automatically.
111
+
112
+ The keyring slot and config file are shared with the R `eolas` client — a key saved from Python is immediately readable from R and vice versa (see the [R client README](https://github.com/phildonovan/eolas-r)).
113
+
114
+ ---
115
+
116
+ ## Command-line interface
117
+
118
+ `pip install eolas-data[cli]` adds an `eolas` command for browsing, fetching, and
119
+ scheduling — useful for shell scripts, cron jobs, and AI-agent workflows. Rich
120
+ tables by default; pass ``--json`` for newline-delimited JSON in scripts.
121
+
122
+ ```bash
123
+ # one-time setup (OS keyring — recommended)
124
+ pip install 'eolas-data[secure]'
125
+ eolas auth save-key
126
+
127
+ # or config file (no extra install)
128
+ eolas auth set-key
129
+ eolas health
130
+
131
+ # discover
132
+ eolas datasets list --source "Stats NZ"
133
+ eolas datasets list --search cpi # table + CPI guidance note
134
+ eolas datasets list --search cpi --json | jq '.[].name'
135
+ eolas datasets info rbnz_m1_prices
136
+ eolas datasets preview rbnz_m1_prices --limit 5
137
+
138
+ # fetch (verb matches the Python lib's client.get())
139
+ eolas get rbnz_m1_prices --format csv > cpi.csv
140
+ eolas get nz_cpi --start 2020-01-01 --format json | jq '.[].value' # OECD YoY %
141
+ eolas get nz_meshblock_2023 --format parquet --out sa2.parquet
142
+ ```
143
+
144
+ ### Scheduling
145
+
146
+ Set up recurring fetches without touching crontab/Task Scheduler syntax. Works
147
+ on Linux, macOS (cron), and Windows (Task Scheduler).
148
+
149
+ ```bash
150
+ eolas schedule add nz_cpi --daily --out ~/data/cpi.csv
151
+ eolas schedule add nz_gdp_growth --weekly --out ~/data/gdp.csv
152
+ eolas schedule add rbnz_b1_exchange_rates_monthly --cron "0 */6 * * *" --out ~/data/fx.csv # POSIX only
153
+
154
+ eolas schedule list
155
+ eolas schedule remove nz_cpi
156
+ ```
157
+
158
+ Daily is the default. Pre-flight check refuses to install a schedule unless
159
+ your API key is configured (otherwise the job would fail silently forever).
160
+
161
+ ### Integrations (Enterprise plan)
162
+
163
+ Generate ready-to-run connector configs for popular data-pipeline tools — eolas
164
+ becomes a one-command source for Meltano, Fivetran, or Azure Data Factory.
165
+
166
+ ```bash
167
+ eolas integrate meltano --datasets nz_cpi,nz_gdp_growth --output ./my-pipeline/
168
+ eolas integrate fivetran --datasets nz_cpi
169
+ eolas integrate azure-data-factory --datasets nz_cpi,nz_gdp_growth
170
+ ```
171
+
172
+ The generated directory has everything needed to plug into your destination
173
+ warehouse: `meltano.yml`, `fivetran.yml`, or ADF JSON resources, plus a `README.md`
174
+ walking through the rest of the setup. Non-Enterprise users see a clear
175
+ upgrade pointer; the gating lives server-side so the capability is bypass-proof.
176
+
177
+ ### Exit codes
178
+
179
+ Distinct exit codes per error class, for shell scripts and agents:
180
+
181
+ | Code | Meaning |
182
+ |---|---|
183
+ | `0` | Success |
184
+ | `1` | Generic error |
185
+ | `2` | Auth (`AuthenticationError`, including Enterprise-gate 403) |
186
+ | `3` | Rate limit hit |
187
+ | `4` | Dataset / resource not found |
188
+ | `5` | Other API error |
189
+ | `64` | Bad usage (mirrors `sysexits.h`) |
190
+
191
+ ## Performance (Arrow)
192
+
193
+ `client.get()` transparently negotiates **Apache Arrow** over the wire — same
194
+ `DataFrame` back, typically **5–10× faster end-to-end** on large pulls, with
195
+ an automatic JSON fallback. No setup: `pyarrow` ships with `eolas-data`, so
196
+ this is on by default; `format=` (`"json"`/`"csv"`) is only for the rare case
197
+ you want the raw text payload.
198
+
199
+ For a columnar file (CLI), use `--format parquet --out FILE`; via the REST
200
+ API directly, `?format=parquet`. Full benchmark: [docs.eolas.fyi → Python
201
+ reference → Performance](https://docs.eolas.fyi/python/reference/).
202
+
203
+ ## Bulk downloads — use `get_local()` for whole datasets
204
+
205
+ `client.get()` hits the live `/data` endpoint (good for slices and small pulls). For whole datasets — especially large or geospatial layers — use `get_local()`. It syncs a CDN-cached Parquet/GeoParquet file to your library directory and reads from disk on subsequent calls.
206
+
207
+ ```python
208
+ # Whole-dataset path: nz_parcels from CDN-cached GeoParquet (seconds, not a 15-min Iceberg scan)
209
+ gdf = client.get_local("nz_parcels") # geopandas.GeoDataFrame when [geo] is installed
210
+ df = client.get_local("nz_cpi") # tidy DataFrame from cached Parquet
211
+
212
+ # Live path: date slices, row limits, licence-restricted sources (e.g. OECD)
213
+ df = client.get("nz_cpi", start="2020-01-01")
214
+ df = client.get("nz_cpi", limit=100)
215
+ ```
216
+
217
+ Use `get_local()` when you need to control `cache_dir`, `format`, or `freshness`:
218
+
219
+ ```python
220
+ # Explicit cache+sync with extra options
221
+ gdf = client.get_local("nz_parcels")
222
+ gdf = client.get_local("nz_parcels", cache_dir="/data/eolas", freshness="monthly")
223
+ df = client.get_local("nz_cpi", format="csv_gz")
224
+ ```
225
+
226
+ For advanced control over the sync lifecycle (sidecar tracking, atomic replace), use `sync_bulk()` directly. For one-shot bytes-or-path downloads, use `download_bulk()`:
227
+
228
+ ```python
229
+ r = client.sync_bulk("nz_cpi", path="nz_cpi.parquet")
230
+ # r.status ∈ {"downloaded", "unchanged", "updated"}; r.bytes_downloaded == 0 when unchanged.
231
+ path = client.download_bulk("treasury_fiscal_spending", path="t.parquet")
232
+ ```
233
+
234
+ **Progress bars:** `download_bulk`, `sync_bulk`, and `get_local` all show a `tqdm` progress bar automatically in interactive terminals and VSCode notebooks, so 1+ GB files are never silent. Pass `progress=False` to suppress in scripts, or set `EOLAS_NO_PROGRESS=1` in the environment for a CI-wide escape hatch. The `--no-progress` flag does the same from the CLI.
235
+
236
+ CLI mirror: `eolas download <name>` for one-shot, `eolas sync <name> [--watch hourly]` for an incremental check. Full docs: [docs.eolas.fyi/bulk-downloads/](https://docs.eolas.fyi/bulk-downloads/).
237
+
238
+ ## Geospatial
239
+
240
+ Datasets with a `geometry_wkt` column auto-convert to `geopandas.GeoDataFrame` if `geopandas` is installed:
241
+
242
+ ```bash
243
+ pip install eolas-data[geo]
244
+ ```
245
+
246
+ ```python
247
+ gdf = client.get("nz_addresses") # GeoDataFrame
248
+ df = client.get("nz_addresses", as_geo=False) # plain DataFrame, WKT preserved
249
+ ```
250
+
251
+ ## Working with large geo datasets
252
+
253
+ The 5.4M-row `linz.nz_parcels` table allocates ~10 GB when materialised as a GeoDataFrame. Pass `as_arrow=True` to skip all shapely allocation and get a zero-copy `pyarrow.Table` instead — geometry stays as Arrow buffers until you need it:
254
+
255
+ ```python
256
+ # Zero-copy Arrow table — no shapely allocation
257
+ tbl = client.linz("nz_parcels", as_arrow=True)
258
+
259
+ # Filter before materialising — dramatically cheaper than loading the full GeoDataFrame
260
+ import duckdb
261
+ result = duckdb.sql("""
262
+ SELECT parcel_id, geometry_wkt
263
+ FROM tbl
264
+ WHERE ST_Within(ST_GeomFromText(geometry_wkt),
265
+ ST_GeomFromText('POLYGON((174.7 -41.3, 174.8 -41.3, 174.8 -41.4, 174.7 -41.4, 174.7 -41.3))'))
266
+ """).df()
267
+ ```
268
+
269
+ `as_arrow=True` works on all datasets (geo or non-geo), all routing modes (live, cached, auto), and all source helpers. It cannot be combined with `as_geo=True`.
270
+
271
+ ## Polars
272
+
273
+ ```bash
274
+ pip install eolas-data[polars]
275
+ ```
276
+
277
+ ```python
278
+ df = client.get("nz_cpi", engine="polars")
279
+ ```
280
+
281
+ ## Plotting
282
+
283
+ `Dataset` is a `pandas.DataFrame` subclass — use matplotlib / seaborn / plotly
284
+ directly. No bundled plot helper, because there's no universal "right" plot for
285
+ a tidy dataset (single-series time series vs. wide multi-measure vs. WKT
286
+ geometry all need different code).
287
+
288
+ ```python
289
+ import matplotlib.pyplot as plt
290
+
291
+ df = client.statsnz("nz_cpi")
292
+ df.plot(x="date", y="value")
293
+ plt.show()
294
+ ```
295
+
296
+ ## Type stubs
297
+
298
+ Dataset names are exposed as a `Literal` so IDEs autocomplete the catalog:
299
+
300
+ ```python
301
+ from eolas_data import Client
302
+
303
+ client = Client()
304
+ client.get("nz_") # autocomplete shows nz_cpi, nz_gdp_growth, ...
305
+ ```
306
+
307
+ The list is regenerated from the live API at release time. Passing a name not in the snapshot still works at runtime — the type hint just won't autocomplete it. Catalog snapshot date is exposed as `eolas_data._dataset_names.CATALOG_SNAPSHOT_DATE`.
308
+
309
+ ## Testing
310
+
311
+ ```bash
312
+ # unit tests (mocked HTTP — no API key needed)
313
+ pytest -q -m "not integration"
314
+
315
+ # live smoke (requires EOLAS_API_KEY)
316
+ EOLAS_API_KEY=vs_... pytest -q -m integration tests/test_smoke_live.py
317
+ ```
318
+
319
+ CI runs the unit suite on Python 3.10, 3.12, and 3.13 on every push/PR. A weekly workflow optionally runs live smoke tests when `EOLAS_API_KEY` is configured as a repository secret.
320
+
321
+ ## Releasing
322
+
323
+ See [`docs/clients.md`](https://github.com/phildonovan/eolas/blob/master/docs/clients.md) in the eolas data repo for the tagged-release flow and PyPI token rotation.
324
+
325
+ Before each release: `python -m eolas_data._regen_names` to refresh the dataset name stubs from the live API, commit the change, then tag and push.
326
+
327
+ ## License
328
+
329
+ MIT