eolas-data 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,101 @@
1
+ name: Catalog drift detector
2
+
3
+ # Pings when the live eolas.fyi catalog has gained namespaces or grown the
4
+ # dataset count significantly without a corresponding library release. Opens
5
+ # (or updates) a single tracking issue rather than spamming.
6
+
7
+ on:
8
+ schedule:
9
+ - cron: "0 7 * * 1" # Mondays 07:00 UTC
10
+ workflow_dispatch:
11
+
12
+ permissions:
13
+ issues: write
14
+ contents: read
15
+
16
+ jobs:
17
+ check:
18
+ runs-on: ubuntu-latest
19
+ steps:
20
+ - uses: actions/checkout@v4
21
+
22
+ - uses: actions/setup-python@v5
23
+ with:
24
+ python-version: "3.12"
25
+
26
+ - name: Compare live catalog to baseline
27
+ id: drift
28
+ run: |
29
+ python <<'PY'
30
+ import json, os, urllib.request
31
+
32
+ # Baseline = the snapshot we shipped in the last release.
33
+ # Drift = anything that's changed since.
34
+ import importlib.util, pathlib
35
+ spec = importlib.util.spec_from_file_location(
36
+ "_dataset_names",
37
+ pathlib.Path("eolas_data/_dataset_names.py"),
38
+ )
39
+ module = importlib.util.module_from_spec(spec)
40
+ spec.loader.exec_module(module)
41
+ baseline = set(module.ALL_NAMES)
42
+ baseline_date = module.CATALOG_SNAPSHOT_DATE
43
+
44
+ with urllib.request.urlopen("https://api.eolas.fyi/v1/datasets", timeout=30) as r:
45
+ live = json.load(r)
46
+ live_names = {d["name"] for d in live}
47
+ live_namespaces = sorted({d["namespace"] for d in live})
48
+
49
+ baseline_namespaces = sorted({n.split("_", 1)[0] for n in baseline}) # crude — better via release-time metadata
50
+
51
+ added = sorted(live_names - baseline)
52
+ removed = sorted(baseline - live_names)
53
+ new_count = len(live_names)
54
+ base_count = len(baseline)
55
+ delta_pct = (new_count - base_count) / max(base_count, 1) * 100
56
+
57
+ # Threshold for "interesting" drift
58
+ significant = (
59
+ len(added) >= 10
60
+ or len(removed) >= 1
61
+ or abs(delta_pct) >= 5.0
62
+ )
63
+
64
+ summary = (
65
+ f"Baseline (release {baseline_date}): {base_count} datasets\\n"
66
+ f"Live now: {new_count} datasets ({delta_pct:+.1f}%)\\n\\n"
67
+ f"Added ({len(added)}): {', '.join(added[:30])}"
68
+ f"{' ...' if len(added) > 30 else ''}\\n\\n"
69
+ f"Removed ({len(removed)}): {', '.join(removed) or 'none'}"
70
+ )
71
+ print(summary)
72
+
73
+ with open(os.environ["GITHUB_OUTPUT"], "a") as f:
74
+ f.write(f"significant={'true' if significant else 'false'}\n")
75
+ f.write(f"new_count={new_count}\n")
76
+ f.write(f"base_count={base_count}\n")
77
+ f.write("summary<<EOF\n")
78
+ f.write(summary.replace("\\n", "\n"))
79
+ f.write("\nEOF\n")
80
+ PY
81
+
82
+ - name: Open or update drift issue
83
+ if: steps.drift.outputs.significant == 'true'
84
+ env:
85
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
86
+ SUMMARY: ${{ steps.drift.outputs.summary }}
87
+ NEW_COUNT: ${{ steps.drift.outputs.new_count }}
88
+ BASE_COUNT: ${{ steps.drift.outputs.base_count }}
89
+ run: |
90
+ set -euo pipefail
91
+ TITLE="Catalog drift: ${BASE_COUNT} → ${NEW_COUNT} datasets"
92
+ BODY=$(printf 'The live API catalog has drifted from the shipped baseline.\n\n%s\n\n---\n\nIf this delta warrants a release: bump version, run `python -m eolas_data._regen_names`, commit, tag, push.\n' "$SUMMARY")
93
+
94
+ # Find an existing open drift issue (label `catalog-drift`) and update it; else create.
95
+ EXISTING=$(gh issue list --label catalog-drift --state open --json number --jq '.[0].number' || true)
96
+ if [ -n "$EXISTING" ]; then
97
+ gh issue edit "$EXISTING" --title "$TITLE" --body "$BODY"
98
+ else
99
+ gh label create catalog-drift --color BFD4F2 --description "Live catalog has drifted from shipped baseline" 2>/dev/null || true
100
+ gh issue create --title "$TITLE" --body "$BODY" --label catalog-drift
101
+ fi
@@ -0,0 +1,31 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+
8
+ jobs:
9
+ publish:
10
+ runs-on: ubuntu-latest
11
+ environment: pypi
12
+ permissions:
13
+ id-token: write
14
+
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+
18
+ - uses: actions/setup-python@v5
19
+ with:
20
+ python-version: "3.12"
21
+
22
+ - name: Install build tools
23
+ run: pip install hatchling build
24
+
25
+ - name: Build
26
+ run: python -m build
27
+
28
+ - name: Publish to PyPI
29
+ uses: pypa/gh-action-pypi-publish@release/v1
30
+ with:
31
+ password: ${{ secrets.PYPI_API_TOKEN }}
@@ -0,0 +1,27 @@
1
+ # Python bytecode
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # Test caches
7
+ .pytest_cache/
8
+ .coverage
9
+ .coverage.*
10
+ htmlcov/
11
+
12
+ # Build / dist
13
+ build/
14
+ dist/
15
+ *.egg-info/
16
+ .eggs/
17
+
18
+ # Virtual environments
19
+ .venv/
20
+ venv/
21
+ env/
22
+
23
+ # Editors
24
+ .idea/
25
+ .vscode/
26
+ *.swp
27
+ .DS_Store
@@ -0,0 +1,203 @@
1
+ Metadata-Version: 2.4
2
+ Name: eolas-data
3
+ Version: 1.0.0
4
+ Summary: Python client for the eolas.fyi statistical data API (NZ, Australia, OECD)
5
+ Project-URL: Homepage, https://eolas.fyi
6
+ Project-URL: Documentation, https://docs.eolas.fyi/
7
+ Project-URL: Repository, https://github.com/phildonovan/eolas-data
8
+ Project-URL: Bug Tracker, https://github.com/phildonovan/eolas-data/issues
9
+ Author-email: Virtus Solutions <phil@virtus-solutions.io>
10
+ License: MIT
11
+ Keywords: api,australia,economics,eolas,new-zealand,statistics
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Topic :: Scientific/Engineering
22
+ Requires-Python: >=3.10
23
+ Requires-Dist: pandas>=1.5
24
+ Requires-Dist: requests>=2.28
25
+ Provides-Extra: cli
26
+ Requires-Dist: rich>=13; extra == 'cli'
27
+ Requires-Dist: typer>=0.12; extra == 'cli'
28
+ Provides-Extra: dev
29
+ Requires-Dist: geopandas>=0.14; extra == 'dev'
30
+ Requires-Dist: pandas; extra == 'dev'
31
+ Requires-Dist: pytest; extra == 'dev'
32
+ Requires-Dist: responses; extra == 'dev'
33
+ Requires-Dist: rich>=13; extra == 'dev'
34
+ Requires-Dist: shapely>=2.0; extra == 'dev'
35
+ Requires-Dist: typer>=0.12; extra == 'dev'
36
+ Provides-Extra: geo
37
+ Requires-Dist: geopandas>=0.14; extra == 'geo'
38
+ Requires-Dist: shapely>=2.0; extra == 'geo'
39
+ Provides-Extra: polars
40
+ Requires-Dist: polars>=0.20; extra == 'polars'
41
+ Description-Content-Type: text/markdown
42
+
43
+ # eolas-data
44
+
45
+ Python client for the [eolas.fyi](https://eolas.fyi) statistical data API — 717+ datasets across NZ, Australia, OECD, and more, served as tidy `pandas` DataFrames (or `polars` / `geopandas` if you prefer).
46
+
47
+ ```bash
48
+ pip install eolas-data
49
+ ```
50
+
51
+ ## Quickstart
52
+
53
+ ```python
54
+ from eolas_data import Client
55
+
56
+ client = Client("your_api_key") # or set EOLAS_API_KEY in env
57
+
58
+ # Generic
59
+ df = client.get("nz_cpi", start="2020-01-01")
60
+
61
+ # Source-specific (sets the `eolas_source` metadata)
62
+ df = client.statsnz("nz_cpi")
63
+ df = client.oecd("nz_gdp_production_annual")
64
+
65
+ # Discovery
66
+ all_datasets = client.list()
67
+ nz_only = client.list("Stats NZ")
68
+ meta = client.info("nz_cpi")
69
+ ```
70
+
71
+ Get an API key at <https://eolas.fyi/signup>. Free plan is 10 requests/month; Starter is 100; Pro is unlimited.
72
+
73
+ ## Command-line interface
74
+
75
+ `pip install eolas-data[cli]` adds an `eolas` command for browsing, fetching, and
76
+ scheduling — useful for shell scripts, cron jobs, and AI-agent workflows. Output
77
+ auto-detects piping: rich tables in a terminal, newline-delimited JSON when
78
+ stdout is piped.
79
+
80
+ ```bash
81
+ # one-time setup
82
+ eolas auth set-key
83
+ eolas health
84
+
85
+ # discover
86
+ eolas datasets list --source "Stats NZ"
87
+ eolas datasets list --search cpi --json | jq '.[].name'
88
+ eolas datasets info nz_cpi
89
+ eolas datasets preview nz_cpi --limit 5
90
+
91
+ # fetch (verb matches the Python lib's client.get())
92
+ eolas get nz_cpi --format csv > cpi.csv
93
+ eolas get nz_cpi --start 2020-01-01 --format json | jq '.[].value'
94
+ eolas get sa2_2023 --format parquet --out sa2.parquet
95
+ ```
96
+
97
+ ### Scheduling
98
+
99
+ Set up recurring fetches without touching crontab/Task Scheduler syntax. Works
100
+ on Linux, macOS (cron), and Windows (Task Scheduler).
101
+
102
+ ```bash
103
+ eolas schedule add nz_cpi --daily --out ~/data/cpi.csv
104
+ eolas schedule add nz_gdp --weekly --out ~/data/gdp.csv
105
+ eolas schedule add nzd_usd --cron "0 */6 * * *" --out ~/data/fx.csv # POSIX only
106
+
107
+ eolas schedule list
108
+ eolas schedule remove nz_cpi
109
+ ```
110
+
111
+ Daily is the default. Pre-flight check refuses to install a schedule unless
112
+ your API key is configured (otherwise the job would fail silently forever).
113
+
114
+ ### Integrations (Enterprise plan)
115
+
116
+ Generate ready-to-run connector configs for popular data-pipeline tools — eolas
117
+ becomes a one-command source for Meltano, Fivetran, or Azure Data Factory.
118
+
119
+ ```bash
120
+ eolas integrate meltano --datasets nz_cpi,nz_gdp --output ./my-pipeline/
121
+ eolas integrate fivetran --datasets nz_cpi
122
+ eolas integrate azure-data-factory --datasets nz_cpi,nz_gdp
123
+ ```
124
+
125
+ The generated directory has everything needed to plug into your destination
126
+ warehouse: `meltano.yml`, `fivetran.yml`, or ADF JSON resources, plus a `README.md`
127
+ walking through the rest of the setup. Non-Enterprise users see a clear
128
+ upgrade pointer; the gating lives server-side so the capability is bypass-proof.
129
+
130
+ ### Exit codes
131
+
132
+ Distinct exit codes per error class, for shell scripts and agents:
133
+
134
+ | Code | Meaning |
135
+ |---|---|
136
+ | `0` | Success |
137
+ | `1` | Generic error |
138
+ | `2` | Auth (`AuthenticationError`, including Enterprise-gate 403) |
139
+ | `3` | Rate limit hit |
140
+ | `4` | Dataset / resource not found |
141
+ | `5` | Other API error |
142
+ | `64` | Bad usage (mirrors `sysexits.h`) |
143
+
144
+ ## Geospatial
145
+
146
+ Datasets with a `geometry_wkt` column auto-convert to `geopandas.GeoDataFrame` if `geopandas` is installed:
147
+
148
+ ```bash
149
+ pip install eolas-data[geo]
150
+ ```
151
+
152
+ ```python
153
+ gdf = client.get("nz_addresses") # GeoDataFrame
154
+ df = client.get("nz_addresses", as_geo=False) # plain DataFrame, WKT preserved
155
+ ```
156
+
157
+ ## Polars
158
+
159
+ ```bash
160
+ pip install eolas-data[polars]
161
+ ```
162
+
163
+ ```python
164
+ df = client.get("nz_cpi", engine="polars")
165
+ ```
166
+
167
+ ## Plotting
168
+
169
+ `Dataset` is a `pandas.DataFrame` subclass — use matplotlib / seaborn / plotly
170
+ directly. No bundled plot helper, because there's no universal "right" plot for
171
+ a tidy dataset (single-series time series vs. wide multi-measure vs. WKT
172
+ geometry all need different code).
173
+
174
+ ```python
175
+ import matplotlib.pyplot as plt
176
+
177
+ df = client.statsnz("nz_cpi")
178
+ df.plot(x="date", y="value")
179
+ plt.show()
180
+ ```
181
+
182
+ ## Type stubs
183
+
184
+ Dataset names are exposed as a `Literal` so IDEs autocomplete the catalog:
185
+
186
+ ```python
187
+ from eolas_data import Client
188
+
189
+ client = Client()
190
+ client.get("nz_") # autocomplete shows nz_cpi, nz_gdp_production_annual, ...
191
+ ```
192
+
193
+ The list is regenerated from the live API at release time. Passing a name not in the snapshot still works at runtime — the type hint just won't autocomplete it. Catalog snapshot date is exposed as `eolas_data._dataset_names.CATALOG_SNAPSHOT_DATE`.
194
+
195
+ ## Releasing
196
+
197
+ See [`docs/clients.md`](https://github.com/phildonovan/eolas/blob/master/docs/clients.md) in the eolas data repo for the tagged-release flow and PyPI token rotation.
198
+
199
+ Before each release: `python -m eolas_data._regen_names` to refresh the dataset name stubs from the live API, commit the change, then tag and push.
200
+
201
+ ## License
202
+
203
+ MIT
@@ -0,0 +1,161 @@
1
+ # eolas-data
2
+
3
+ Python client for the [eolas.fyi](https://eolas.fyi) statistical data API — 717+ datasets across NZ, Australia, OECD, and more, served as tidy `pandas` DataFrames (or `polars` / `geopandas` if you prefer).
4
+
5
+ ```bash
6
+ pip install eolas-data
7
+ ```
8
+
9
+ ## Quickstart
10
+
11
+ ```python
12
+ from eolas_data import Client
13
+
14
+ client = Client("your_api_key") # or set EOLAS_API_KEY in env
15
+
16
+ # Generic
17
+ df = client.get("nz_cpi", start="2020-01-01")
18
+
19
+ # Source-specific (sets the `eolas_source` metadata)
20
+ df = client.statsnz("nz_cpi")
21
+ df = client.oecd("nz_gdp_production_annual")
22
+
23
+ # Discovery
24
+ all_datasets = client.list()
25
+ nz_only = client.list("Stats NZ")
26
+ meta = client.info("nz_cpi")
27
+ ```
28
+
29
+ Get an API key at <https://eolas.fyi/signup>. Free plan is 10 requests/month; Starter is 100; Pro is unlimited.
30
+
31
+ ## Command-line interface
32
+
33
+ `pip install eolas-data[cli]` adds an `eolas` command for browsing, fetching, and
34
+ scheduling — useful for shell scripts, cron jobs, and AI-agent workflows. Output
35
+ auto-detects piping: rich tables in a terminal, newline-delimited JSON when
36
+ stdout is piped.
37
+
38
+ ```bash
39
+ # one-time setup
40
+ eolas auth set-key
41
+ eolas health
42
+
43
+ # discover
44
+ eolas datasets list --source "Stats NZ"
45
+ eolas datasets list --search cpi --json | jq '.[].name'
46
+ eolas datasets info nz_cpi
47
+ eolas datasets preview nz_cpi --limit 5
48
+
49
+ # fetch (verb matches the Python lib's client.get())
50
+ eolas get nz_cpi --format csv > cpi.csv
51
+ eolas get nz_cpi --start 2020-01-01 --format json | jq '.[].value'
52
+ eolas get sa2_2023 --format parquet --out sa2.parquet
53
+ ```
54
+
55
+ ### Scheduling
56
+
57
+ Set up recurring fetches without touching crontab/Task Scheduler syntax. Works
58
+ on Linux, macOS (cron), and Windows (Task Scheduler).
59
+
60
+ ```bash
61
+ eolas schedule add nz_cpi --daily --out ~/data/cpi.csv
62
+ eolas schedule add nz_gdp --weekly --out ~/data/gdp.csv
63
+ eolas schedule add nzd_usd --cron "0 */6 * * *" --out ~/data/fx.csv # POSIX only
64
+
65
+ eolas schedule list
66
+ eolas schedule remove nz_cpi
67
+ ```
68
+
69
+ Daily is the default. Pre-flight check refuses to install a schedule unless
70
+ your API key is configured (otherwise the job would fail silently forever).
71
+
72
+ ### Integrations (Enterprise plan)
73
+
74
+ Generate ready-to-run connector configs for popular data-pipeline tools — eolas
75
+ becomes a one-command source for Meltano, Fivetran, or Azure Data Factory.
76
+
77
+ ```bash
78
+ eolas integrate meltano --datasets nz_cpi,nz_gdp --output ./my-pipeline/
79
+ eolas integrate fivetran --datasets nz_cpi
80
+ eolas integrate azure-data-factory --datasets nz_cpi,nz_gdp
81
+ ```
82
+
83
+ The generated directory has everything needed to plug into your destination
84
+ warehouse: `meltano.yml`, `fivetran.yml`, or ADF JSON resources, plus a `README.md`
85
+ walking through the rest of the setup. Non-Enterprise users see a clear
86
+ upgrade pointer; the gating lives server-side so the capability is bypass-proof.
87
+
88
+ ### Exit codes
89
+
90
+ Distinct exit codes per error class, for shell scripts and agents:
91
+
92
+ | Code | Meaning |
93
+ |---|---|
94
+ | `0` | Success |
95
+ | `1` | Generic error |
96
+ | `2` | Auth (`AuthenticationError`, including Enterprise-gate 403) |
97
+ | `3` | Rate limit hit |
98
+ | `4` | Dataset / resource not found |
99
+ | `5` | Other API error |
100
+ | `64` | Bad usage (mirrors `sysexits.h`) |
101
+
102
+ ## Geospatial
103
+
104
+ Datasets with a `geometry_wkt` column auto-convert to `geopandas.GeoDataFrame` if `geopandas` is installed:
105
+
106
+ ```bash
107
+ pip install eolas-data[geo]
108
+ ```
109
+
110
+ ```python
111
+ gdf = client.get("nz_addresses") # GeoDataFrame
112
+ df = client.get("nz_addresses", as_geo=False) # plain DataFrame, WKT preserved
113
+ ```
114
+
115
+ ## Polars
116
+
117
+ ```bash
118
+ pip install eolas-data[polars]
119
+ ```
120
+
121
+ ```python
122
+ df = client.get("nz_cpi", engine="polars")
123
+ ```
124
+
125
+ ## Plotting
126
+
127
+ `Dataset` is a `pandas.DataFrame` subclass — use matplotlib / seaborn / plotly
128
+ directly. No bundled plot helper, because there's no universal "right" plot for
129
+ a tidy dataset (single-series time series vs. wide multi-measure vs. WKT
130
+ geometry all need different code).
131
+
132
+ ```python
133
+ import matplotlib.pyplot as plt
134
+
135
+ df = client.statsnz("nz_cpi")
136
+ df.plot(x="date", y="value")
137
+ plt.show()
138
+ ```
139
+
140
+ ## Type stubs
141
+
142
+ Dataset names are exposed as a `Literal` so IDEs autocomplete the catalog:
143
+
144
+ ```python
145
+ from eolas_data import Client
146
+
147
+ client = Client()
148
+ client.get("nz_") # autocomplete shows nz_cpi, nz_gdp_production_annual, ...
149
+ ```
150
+
151
+ The list is regenerated from the live API at release time. Passing a name not in the snapshot still works at runtime — the type hint just won't autocomplete it. Catalog snapshot date is exposed as `eolas_data._dataset_names.CATALOG_SNAPSHOT_DATE`.
152
+
153
+ ## Releasing
154
+
155
+ See [`docs/clients.md`](https://github.com/phildonovan/eolas/blob/master/docs/clients.md) in the eolas data repo for the tagged-release flow and PyPI token rotation.
156
+
157
+ Before each release: `python -m eolas_data._regen_names` to refresh the dataset name stubs from the live API, commit the change, then tag and push.
158
+
159
+ ## License
160
+
161
+ MIT
@@ -0,0 +1,16 @@
1
+ """eolas-data — Python client for the eolas.fyi statistical data API."""
2
+ from .client import Client
3
+ from .dataset import Dataset
4
+ from .exceptions import APIError, AuthenticationError, EolasError, NotFoundError, RateLimitError
5
+
6
+ __version__ = "1.0.0"
7
+
8
+ __all__ = [
9
+ "Client",
10
+ "Dataset",
11
+ "EolasError",
12
+ "AuthenticationError",
13
+ "RateLimitError",
14
+ "NotFoundError",
15
+ "APIError",
16
+ ]