aemo-mcp 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aemo_mcp/__init__.py +6 -0
- aemo_mcp/cache.py +139 -0
- aemo_mcp/client.py +237 -0
- aemo_mcp/curated.py +226 -0
- aemo_mcp/data/curated/daily_summary.yaml +68 -0
- aemo_mcp/data/curated/dispatch_price.yaml +54 -0
- aemo_mcp/data/curated/dispatch_region.yaml +64 -0
- aemo_mcp/data/curated/generation_scada.yaml +75 -0
- aemo_mcp/data/curated/interconnector_flows.yaml +61 -0
- aemo_mcp/data/curated/predispatch_30min.yaml +65 -0
- aemo_mcp/data/curated/rooftop_pv.yaml +66 -0
- aemo_mcp/data/duid_snapshot.csv +357 -0
- aemo_mcp/duid_lookup.py +138 -0
- aemo_mcp/feeds.py +153 -0
- aemo_mcp/fetch.py +598 -0
- aemo_mcp/models.py +99 -0
- aemo_mcp/parsing.py +138 -0
- aemo_mcp/py.typed +0 -0
- aemo_mcp/server.py +591 -0
- aemo_mcp/shaping.py +249 -0
- aemo_mcp-0.1.2.dist-info/METADATA +283 -0
- aemo_mcp-0.1.2.dist-info/RECORD +25 -0
- aemo_mcp-0.1.2.dist-info/WHEEL +4 -0
- aemo_mcp-0.1.2.dist-info/entry_points.txt +2 -0
- aemo_mcp-0.1.2.dist-info/licenses/LICENSE +33 -0
aemo_mcp/__init__.py
ADDED
aemo_mcp/cache.py
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
"""SQLite-backed HTTP cache with per-read TTL.
|
|
2
|
+
|
|
3
|
+
Ported from rba-mcp 0.1.9. Same single-table design — TTL is evaluated at
|
|
4
|
+
read time so one cached row can satisfy different freshness windows. The
|
|
5
|
+
`kind` column lets us run targeted invalidation later.
|
|
6
|
+
|
|
7
|
+
CacheKind values are tuned to AEMO cadences:
|
|
8
|
+
- "live" → 60 seconds (5-min dispatch feeds)
|
|
9
|
+
- "half_hour" → 5 minutes (30-min feeds: rooftop PV, predispatch)
|
|
10
|
+
- "forecast" → 1 hour (longer-horizon forecast bundles)
|
|
11
|
+
- "daily" → 24 hours (daily rolled-up archives)
|
|
12
|
+
- "archive" → 7 days (MMSDM and other immutable historical files)
|
|
13
|
+
- "listing" → 30 seconds (NEMWEB directory HTML — drives latest-file detection)
|
|
14
|
+
|
|
15
|
+
NEMWEB's published files are immutable once written (filename embeds the
|
|
16
|
+
interval timestamp), so the file-body cache is effectively infinite — TTL
|
|
17
|
+
only matters for directory listings and "latest" calls.
|
|
18
|
+
"""
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import asyncio
|
|
22
|
+
import sqlite3
|
|
23
|
+
import time
|
|
24
|
+
from datetime import timedelta
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
from typing import Literal
|
|
27
|
+
|
|
28
|
+
import aiosqlite
|
|
29
|
+
|
|
30
|
+
CacheKind = Literal["live", "half_hour", "forecast", "daily", "archive", "listing"]
|
|
31
|
+
|
|
32
|
+
DEFAULT_DB_PATH = Path.home() / ".aemo-mcp" / "cache.db"
|
|
33
|
+
|
|
34
|
+
TTL: dict[CacheKind, timedelta] = {
|
|
35
|
+
"live": timedelta(seconds=60),
|
|
36
|
+
"half_hour": timedelta(minutes=5),
|
|
37
|
+
"forecast": timedelta(hours=1),
|
|
38
|
+
"daily": timedelta(hours=24),
|
|
39
|
+
"archive": timedelta(days=7),
|
|
40
|
+
"listing": timedelta(seconds=30),
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
_SCHEMA = """
|
|
44
|
+
CREATE TABLE IF NOT EXISTS http_cache (
|
|
45
|
+
cache_key TEXT PRIMARY KEY,
|
|
46
|
+
payload BLOB NOT NULL,
|
|
47
|
+
cached_at REAL NOT NULL,
|
|
48
|
+
kind TEXT NOT NULL
|
|
49
|
+
);
|
|
50
|
+
CREATE INDEX IF NOT EXISTS idx_kind_cached_at ON http_cache(kind, cached_at);
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class Cache:
|
|
55
|
+
def __init__(self, db_path: Path | None = None) -> None:
|
|
56
|
+
# Resolve DEFAULT_DB_PATH at construction time (not class-def time)
|
|
57
|
+
# so tests that monkeypatch the module-level constant take effect.
|
|
58
|
+
# Default Path() args are evaluated once at class definition, which
|
|
59
|
+
# is too early to be overridden — leaked across tests previously.
|
|
60
|
+
import aemo_mcp.cache as _self_mod
|
|
61
|
+
self.db_path = db_path or _self_mod.DEFAULT_DB_PATH
|
|
62
|
+
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
63
|
+
self._initialized = False
|
|
64
|
+
self._init_lock = asyncio.Lock()
|
|
65
|
+
|
|
66
|
+
async def _ensure_init(self) -> None:
|
|
67
|
+
if self._initialized:
|
|
68
|
+
return
|
|
69
|
+
async with self._init_lock:
|
|
70
|
+
if self._initialized:
|
|
71
|
+
return
|
|
72
|
+
try:
|
|
73
|
+
await self._init_schema()
|
|
74
|
+
except sqlite3.DatabaseError:
|
|
75
|
+
# Pre-existing cache.db is corrupt or has an incompatible
|
|
76
|
+
# schema. The cache is a perf optimisation, not a source of
|
|
77
|
+
# truth — drop and recreate is always safe.
|
|
78
|
+
self.db_path.unlink(missing_ok=True)
|
|
79
|
+
await self._init_schema()
|
|
80
|
+
self._initialized = True
|
|
81
|
+
|
|
82
|
+
async def _init_schema(self) -> None:
|
|
83
|
+
async with aiosqlite.connect(self.db_path) as conn:
|
|
84
|
+
await conn.execute("PRAGMA journal_mode=WAL")
|
|
85
|
+
await conn.executescript(_SCHEMA)
|
|
86
|
+
await conn.commit()
|
|
87
|
+
|
|
88
|
+
async def get(self, key: str, ttl: timedelta) -> bytes | None:
|
|
89
|
+
await self._ensure_init()
|
|
90
|
+
cutoff = time.time() - ttl.total_seconds()
|
|
91
|
+
async with aiosqlite.connect(self.db_path) as conn:
|
|
92
|
+
async with conn.execute(
|
|
93
|
+
"SELECT payload FROM http_cache WHERE cache_key = ? AND cached_at >= ?",
|
|
94
|
+
(key, cutoff),
|
|
95
|
+
) as cur:
|
|
96
|
+
row = await cur.fetchone()
|
|
97
|
+
return row[0] if row else None
|
|
98
|
+
|
|
99
|
+
async def get_stale(self, key: str) -> tuple[bytes, float] | None:
|
|
100
|
+
"""Return cached (payload, cached_at_epoch) regardless of TTL.
|
|
101
|
+
|
|
102
|
+
Used by the client as a fallback when NEMWEB is unavailable —
|
|
103
|
+
graceful degradation per CLAUDE.md quality dimension #4. The caller
|
|
104
|
+
computes "how stale" from the timestamp and surfaces it in
|
|
105
|
+
`DataResponse.stale_reason`.
|
|
106
|
+
"""
|
|
107
|
+
await self._ensure_init()
|
|
108
|
+
async with aiosqlite.connect(self.db_path) as conn:
|
|
109
|
+
async with conn.execute(
|
|
110
|
+
"SELECT payload, cached_at FROM http_cache WHERE cache_key = ?",
|
|
111
|
+
(key,),
|
|
112
|
+
) as cur:
|
|
113
|
+
row = await cur.fetchone()
|
|
114
|
+
return (row[0], row[1]) if row else None
|
|
115
|
+
|
|
116
|
+
async def set(self, key: str, value: bytes, kind: CacheKind) -> None:
|
|
117
|
+
await self._ensure_init()
|
|
118
|
+
async with aiosqlite.connect(self.db_path) as conn:
|
|
119
|
+
await conn.execute(
|
|
120
|
+
"""
|
|
121
|
+
INSERT INTO http_cache (cache_key, payload, cached_at, kind)
|
|
122
|
+
VALUES (?, ?, ?, ?)
|
|
123
|
+
ON CONFLICT(cache_key) DO UPDATE SET
|
|
124
|
+
payload = excluded.payload,
|
|
125
|
+
cached_at = excluded.cached_at,
|
|
126
|
+
kind = excluded.kind
|
|
127
|
+
""",
|
|
128
|
+
(key, value, time.time(), kind),
|
|
129
|
+
)
|
|
130
|
+
await conn.commit()
|
|
131
|
+
|
|
132
|
+
async def clear(self, kind: CacheKind | None = None) -> None:
|
|
133
|
+
await self._ensure_init()
|
|
134
|
+
async with aiosqlite.connect(self.db_path) as conn:
|
|
135
|
+
if kind:
|
|
136
|
+
await conn.execute("DELETE FROM http_cache WHERE kind = ?", (kind,))
|
|
137
|
+
else:
|
|
138
|
+
await conn.execute("DELETE FROM http_cache")
|
|
139
|
+
await conn.commit()
|
aemo_mcp/client.py
ADDED
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
"""Async NEMWEB fetcher.
|
|
2
|
+
|
|
3
|
+
NEMWEB is a static IIS file server. There is no API — directory listings are
|
|
4
|
+
HTML; files are ZIPs containing CSVs.
|
|
5
|
+
|
|
6
|
+
Three responsibilities:
|
|
7
|
+
|
|
8
|
+
1. `fetch_directory_listing(folder)` — GET the IIS HTML and return the list
|
|
9
|
+
of file names matching a regex.
|
|
10
|
+
2. `fetch_zip(url)` — GET a single ZIP file and return its bytes.
|
|
11
|
+
3. Cache + in-flight dedup — concurrent callers for the same URL share one
|
|
12
|
+
HTTP request. Critical at 5-min cadence where 50 concurrent `latest()`
|
|
13
|
+
calls would otherwise hammer NEMWEB.
|
|
14
|
+
|
|
15
|
+
NEMWEB's published files are immutable once written (filename embeds the
|
|
16
|
+
interval timestamp), so the file-body cache TTL is effectively infinite.
|
|
17
|
+
Only the directory listing has freshness sensitivity — that's the 30s TTL.
|
|
18
|
+
"""
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import asyncio
|
|
22
|
+
import re
|
|
23
|
+
import time
|
|
24
|
+
from contextvars import ContextVar
|
|
25
|
+
from typing import Any
|
|
26
|
+
|
|
27
|
+
import httpx
|
|
28
|
+
|
|
29
|
+
from .cache import TTL, Cache, CacheKind
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# ─── stale signal (graceful-degradation reporting per CLAUDE.md dim #4) ─
|
|
33
|
+
# When NEMWEB is unreachable, _fetch_cached falls back to the cached payload
|
|
34
|
+
# regardless of TTL and records the staleness in this ContextVar. Server-side
|
|
35
|
+
# tool wrappers read it after the request chain and set
|
|
36
|
+
# DataResponse.stale / .stale_reason. ContextVar (not instance attr) so
|
|
37
|
+
# concurrent MCP tool calls each see their own state.
|
|
38
|
+
_stale_signal: ContextVar[tuple[bool, str | None]] = ContextVar(
|
|
39
|
+
"aemo_mcp_stale_signal", default=(False, None)
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def reset_stale_signal() -> None:
|
|
44
|
+
"""Clear the stale state. Call once at the start of each tool call."""
|
|
45
|
+
_stale_signal.set((False, None))
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def get_stale_signal() -> tuple[bool, str | None]:
|
|
49
|
+
"""Return (stale, reason) for the most recent fetch chain in this context."""
|
|
50
|
+
return _stale_signal.get()
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _mark_stale(reason: str) -> None:
|
|
54
|
+
"""Record that a stale-cache fallback was served this context.
|
|
55
|
+
|
|
56
|
+
If multiple fetches in one chain are stale, we keep the FIRST reason
|
|
57
|
+
(it's usually the most informative — the originating upstream failure).
|
|
58
|
+
"""
|
|
59
|
+
cur_stale, _ = _stale_signal.get()
|
|
60
|
+
if not cur_stale:
|
|
61
|
+
_stale_signal.set((True, reason))
|
|
62
|
+
|
|
63
|
+
DEFAULT_BASE_URL = "http://nemweb.com.au"
|
|
64
|
+
DEFAULT_TIMEOUT = httpx.Timeout(60.0, connect=10.0)
|
|
65
|
+
|
|
66
|
+
# IIS directory listings render entries as <A HREF="path">name</A>.
|
|
67
|
+
# Capture the file name (last segment after the slash) — relative or absolute.
|
|
68
|
+
_HREF_PATTERN = re.compile(
|
|
69
|
+
r'<a\s+href="([^"]+)"',
|
|
70
|
+
re.IGNORECASE,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class AEMOAPIError(Exception):
|
|
75
|
+
"""Raised when NEMWEB returns a non-2xx response or the request fails."""
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class AEMOClient:
|
|
79
|
+
def __init__(
|
|
80
|
+
self,
|
|
81
|
+
cache: Cache | None = None,
|
|
82
|
+
base_url: str = DEFAULT_BASE_URL,
|
|
83
|
+
transport: httpx.AsyncBaseTransport | None = None,
|
|
84
|
+
user_agent: str | None = None,
|
|
85
|
+
) -> None:
|
|
86
|
+
self.base_url = base_url.rstrip("/")
|
|
87
|
+
self.cache = cache or Cache()
|
|
88
|
+
ua = user_agent or "aemo-mcp/0.1 (+https://github.com/Bigred97/aemo-mcp)"
|
|
89
|
+
self._http = httpx.AsyncClient(
|
|
90
|
+
timeout=DEFAULT_TIMEOUT,
|
|
91
|
+
transport=transport,
|
|
92
|
+
headers={"User-Agent": ua},
|
|
93
|
+
follow_redirects=True,
|
|
94
|
+
)
|
|
95
|
+
self._in_flight: dict[str, asyncio.Future[bytes]] = {}
|
|
96
|
+
self._in_flight_lock = asyncio.Lock()
|
|
97
|
+
|
|
98
|
+
async def aclose(self) -> None:
|
|
99
|
+
await self._http.aclose()
|
|
100
|
+
|
|
101
|
+
async def __aenter__(self) -> "AEMOClient":
|
|
102
|
+
return self
|
|
103
|
+
|
|
104
|
+
async def __aexit__(self, *exc: Any) -> None:
|
|
105
|
+
await self.aclose()
|
|
106
|
+
|
|
107
|
+
async def _fetch_cached(self, url: str, *, kind: CacheKind) -> bytes:
|
|
108
|
+
"""Generic cached + in-flight-deduped fetch.
|
|
109
|
+
|
|
110
|
+
Concurrent callers for the same URL share one in-flight HTTP request.
|
|
111
|
+
"""
|
|
112
|
+
cached = await self.cache.get(url, ttl=TTL[kind])
|
|
113
|
+
if cached is not None:
|
|
114
|
+
return cached
|
|
115
|
+
|
|
116
|
+
# Race-safe in-flight registration.
|
|
117
|
+
async with self._in_flight_lock:
|
|
118
|
+
existing = self._in_flight.get(url)
|
|
119
|
+
if existing is None:
|
|
120
|
+
future: asyncio.Future[bytes] = (
|
|
121
|
+
asyncio.get_running_loop().create_future()
|
|
122
|
+
)
|
|
123
|
+
self._in_flight[url] = future
|
|
124
|
+
|
|
125
|
+
if existing is not None:
|
|
126
|
+
return await existing
|
|
127
|
+
|
|
128
|
+
try:
|
|
129
|
+
try:
|
|
130
|
+
resp = await self._http.get(url)
|
|
131
|
+
resp.raise_for_status()
|
|
132
|
+
except (httpx.HTTPStatusError, httpx.RequestError) as e:
|
|
133
|
+
# Graceful degradation: when NEMWEB is unreachable, fall back
|
|
134
|
+
# to the most-recent cached payload (regardless of TTL) rather
|
|
135
|
+
# than raising and breaking the agent's chain of reasoning.
|
|
136
|
+
# The staleness is surfaced via the _stale_signal ContextVar
|
|
137
|
+
# and ends up in DataResponse.stale / stale_reason.
|
|
138
|
+
fallback = await self.cache.get_stale(url)
|
|
139
|
+
if fallback is not None:
|
|
140
|
+
payload, cached_at = fallback
|
|
141
|
+
age_min = max(0, int((time.time() - cached_at) / 60))
|
|
142
|
+
if isinstance(e, httpx.HTTPStatusError):
|
|
143
|
+
upstream = (
|
|
144
|
+
f"AEMO/OpenNEM fetch returned "
|
|
145
|
+
f"{e.response.status_code}"
|
|
146
|
+
)
|
|
147
|
+
else:
|
|
148
|
+
upstream = (
|
|
149
|
+
f"AEMO/OpenNEM fetch unreachable "
|
|
150
|
+
f"({type(e).__name__})"
|
|
151
|
+
)
|
|
152
|
+
_mark_stale(
|
|
153
|
+
f"{upstream} for {url}; serving cached payload from "
|
|
154
|
+
f"~{age_min} minute(s) ago"
|
|
155
|
+
)
|
|
156
|
+
future.set_result(payload)
|
|
157
|
+
return payload
|
|
158
|
+
# Genuinely no cache to fall back to — preserve original behaviour
|
|
159
|
+
if isinstance(e, httpx.HTTPStatusError):
|
|
160
|
+
raise AEMOAPIError(
|
|
161
|
+
f"NEMWEB returned {e.response.status_code} for {url}"
|
|
162
|
+
) from e
|
|
163
|
+
raise AEMOAPIError(f"NEMWEB request failed: {e}") from e
|
|
164
|
+
await self.cache.set(url, resp.content, kind=kind)
|
|
165
|
+
future.set_result(resp.content)
|
|
166
|
+
return resp.content
|
|
167
|
+
except BaseException as e:
|
|
168
|
+
if not future.done():
|
|
169
|
+
future.set_exception(e)
|
|
170
|
+
# When no other coroutine is awaiting this future, the
|
|
171
|
+
# exception we just set would be GC'd unretrieved and Python
|
|
172
|
+
# would log "Future exception was never retrieved". Mark it
|
|
173
|
+
# retrieved here — the calling coroutine still gets `raise` so
|
|
174
|
+
# the exception propagates normally.
|
|
175
|
+
try:
|
|
176
|
+
future.exception()
|
|
177
|
+
except Exception:
|
|
178
|
+
pass
|
|
179
|
+
raise
|
|
180
|
+
finally:
|
|
181
|
+
async with self._in_flight_lock:
|
|
182
|
+
self._in_flight.pop(url, None)
|
|
183
|
+
|
|
184
|
+
async def fetch_directory_listing(
|
|
185
|
+
self,
|
|
186
|
+
folder: str,
|
|
187
|
+
*,
|
|
188
|
+
filename_regex: re.Pattern[str] | None = None,
|
|
189
|
+
kind: CacheKind = "listing",
|
|
190
|
+
) -> list[str]:
|
|
191
|
+
"""Fetch a NEMWEB folder's HTML listing and return sorted file names.
|
|
192
|
+
|
|
193
|
+
`folder` is a path under `base_url`, e.g.
|
|
194
|
+
'/Reports/Current/DispatchIS_Reports/'. Trailing slash optional.
|
|
195
|
+
|
|
196
|
+
If `filename_regex` is provided, only file names that fully match it
|
|
197
|
+
are returned. The list is sorted ascending — for AEMO filename
|
|
198
|
+
patterns (timestamp-prefixed) this means the LAST entry is the most
|
|
199
|
+
recent.
|
|
200
|
+
"""
|
|
201
|
+
if not folder.startswith("/"):
|
|
202
|
+
folder = "/" + folder
|
|
203
|
+
if not folder.endswith("/"):
|
|
204
|
+
folder = folder + "/"
|
|
205
|
+
url = f"{self.base_url}{folder}"
|
|
206
|
+
body = await self._fetch_cached(url, kind=kind)
|
|
207
|
+
text = body.decode("utf-8", errors="replace")
|
|
208
|
+
|
|
209
|
+
names: list[str] = []
|
|
210
|
+
for match in _HREF_PATTERN.finditer(text):
|
|
211
|
+
href = match.group(1)
|
|
212
|
+
# Normalise: drop everything up to the final slash.
|
|
213
|
+
name = href.rsplit("/", 1)[-1]
|
|
214
|
+
if not name or name in (".", ".."):
|
|
215
|
+
continue
|
|
216
|
+
if filename_regex is not None and not filename_regex.fullmatch(name):
|
|
217
|
+
continue
|
|
218
|
+
names.append(name)
|
|
219
|
+
names.sort()
|
|
220
|
+
return names
|
|
221
|
+
|
|
222
|
+
async def fetch_zip(self, url: str, *, kind: CacheKind = "archive") -> bytes:
|
|
223
|
+
"""Fetch a single NEMWEB ZIP file. Returns raw bytes.
|
|
224
|
+
|
|
225
|
+
Timestamped files are immutable — once a file with name
|
|
226
|
+
`PUBLIC_DISPATCHIS_202605141000_X.zip` exists it never changes. Cache
|
|
227
|
+
with `archive` TTL (7 days) by default. Caller can override for
|
|
228
|
+
feeds where files might be republished.
|
|
229
|
+
"""
|
|
230
|
+
return await self._fetch_cached(url, kind=kind)
|
|
231
|
+
|
|
232
|
+
def build_url(self, folder: str, filename: str) -> str:
|
|
233
|
+
if not folder.startswith("/"):
|
|
234
|
+
folder = "/" + folder
|
|
235
|
+
if not folder.endswith("/"):
|
|
236
|
+
folder = folder + "/"
|
|
237
|
+
return f"{self.base_url}{folder}{filename}"
|
aemo_mcp/curated.py
ADDED
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
"""Hand-curated metadata for the 7 NEM datasets.
|
|
2
|
+
|
|
3
|
+
Each YAML in `data/curated/` describes one dataset:
|
|
4
|
+
- backend folder + filename pattern (regex)
|
|
5
|
+
- multi-section CSV section name(s) to extract
|
|
6
|
+
- filter dimensions (region, interconnector, metric, ...)
|
|
7
|
+
- cadence + cache TTL kind
|
|
8
|
+
- units + plain-English search keywords
|
|
9
|
+
|
|
10
|
+
Loaded once at import time and stored in a frozen dataclass registry.
|
|
11
|
+
"""
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import re
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from importlib import resources
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import Any
|
|
19
|
+
|
|
20
|
+
import yaml
|
|
21
|
+
|
|
22
|
+
from .cache import CacheKind
|
|
23
|
+
from .models import DatasetDetail, DatasetFilter
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass(frozen=True)
|
|
27
|
+
class CuratedFilter:
|
|
28
|
+
key: str
|
|
29
|
+
description: str
|
|
30
|
+
values: tuple[str, ...] = ()
|
|
31
|
+
required: bool = False
|
|
32
|
+
# Maps user-facing filter values → CSV row predicates. Each predicate is
|
|
33
|
+
# a dict {column: value}. Most filters map 1:1 to a single column.
|
|
34
|
+
column: str | None = None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass(frozen=True)
|
|
38
|
+
class CuratedMetric:
|
|
39
|
+
"""One numeric metric extracted from the CSV section."""
|
|
40
|
+
key: str # response-side key (e.g. "rrp")
|
|
41
|
+
source_column: str # column header in the CSV section
|
|
42
|
+
description: str # plain-English
|
|
43
|
+
unit: str # e.g. "$/MWh"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass(frozen=True)
|
|
47
|
+
class CuratedSection:
|
|
48
|
+
"""One CSV section to extract from the fetched ZIP."""
|
|
49
|
+
name: str # e.g. "DISPATCH.PRICE"
|
|
50
|
+
# When a single dataset stitches together multiple sections (eg.
|
|
51
|
+
# `rooftop_pv` combines ACTUAL + FORECAST from two different folders),
|
|
52
|
+
# `discriminator` distinguishes them in the output dimensions.
|
|
53
|
+
discriminator: str | None = None
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@dataclass(frozen=True)
|
|
57
|
+
class CuratedFolder:
|
|
58
|
+
"""One NEMWEB folder this dataset fetches from."""
|
|
59
|
+
path: str # e.g. "/Reports/Current/DispatchIS_Reports/"
|
|
60
|
+
filename_regex: str # e.g. "PUBLIC_DISPATCHIS_.*\\.zip"
|
|
61
|
+
sections: tuple[CuratedSection, ...] = ()
|
|
62
|
+
discriminator: str | None = None # see CuratedSection.discriminator
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@dataclass(frozen=True)
|
|
66
|
+
class CuratedDataset:
|
|
67
|
+
id: str
|
|
68
|
+
name: str
|
|
69
|
+
description: str
|
|
70
|
+
cadence: str # plain-English: "5 min" / "30 min" / "Daily"
|
|
71
|
+
cache_kind: CacheKind
|
|
72
|
+
folders: tuple[CuratedFolder, ...]
|
|
73
|
+
filters: tuple[CuratedFilter, ...] = ()
|
|
74
|
+
metrics: tuple[CuratedMetric, ...] = ()
|
|
75
|
+
settlement_column: str = "SETTLEMENTDATE" # AEMO-standard period column
|
|
76
|
+
source_url: str = "http://nemweb.com.au/Reports/Current/"
|
|
77
|
+
search_keywords: tuple[str, ...] = ()
|
|
78
|
+
examples: tuple[str, ...] = ()
|
|
79
|
+
|
|
80
|
+
def to_detail(self) -> DatasetDetail:
|
|
81
|
+
return DatasetDetail(
|
|
82
|
+
id=self.id,
|
|
83
|
+
name=self.name,
|
|
84
|
+
description=self.description,
|
|
85
|
+
is_curated=True,
|
|
86
|
+
cadence=self.cadence,
|
|
87
|
+
filters=[
|
|
88
|
+
DatasetFilter(
|
|
89
|
+
key=f.key,
|
|
90
|
+
description=f.description,
|
|
91
|
+
values=list(f.values),
|
|
92
|
+
required=f.required,
|
|
93
|
+
)
|
|
94
|
+
for f in self.filters
|
|
95
|
+
],
|
|
96
|
+
units={m.key: m.unit for m in self.metrics},
|
|
97
|
+
source_url=self.source_url,
|
|
98
|
+
examples=list(self.examples),
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
def get_filter(self, key: str) -> CuratedFilter | None:
|
|
102
|
+
for f in self.filters:
|
|
103
|
+
if f.key == key:
|
|
104
|
+
return f
|
|
105
|
+
return None
|
|
106
|
+
|
|
107
|
+
def get_metric(self, key: str) -> CuratedMetric | None:
|
|
108
|
+
for m in self.metrics:
|
|
109
|
+
if m.key == key:
|
|
110
|
+
return m
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
_REGISTRY: dict[str, CuratedDataset] | None = None
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _yaml_dir() -> Path:
|
|
118
|
+
try:
|
|
119
|
+
ref = resources.files("aemo_mcp").joinpath("data/curated")
|
|
120
|
+
if ref.is_dir():
|
|
121
|
+
return Path(str(ref))
|
|
122
|
+
except (ModuleNotFoundError, AttributeError):
|
|
123
|
+
pass
|
|
124
|
+
here = Path(__file__).resolve().parent / "data" / "curated"
|
|
125
|
+
if here.is_dir():
|
|
126
|
+
return here
|
|
127
|
+
raise FileNotFoundError("Could not locate aemo_mcp/data/curated/")
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _parse_filter(raw: dict[str, Any]) -> CuratedFilter:
|
|
131
|
+
return CuratedFilter(
|
|
132
|
+
key=str(raw["key"]),
|
|
133
|
+
description=str(raw.get("description", "")),
|
|
134
|
+
values=tuple(raw.get("values") or ()),
|
|
135
|
+
required=bool(raw.get("required", False)),
|
|
136
|
+
column=raw.get("column"),
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _parse_metric(raw: dict[str, Any]) -> CuratedMetric:
|
|
141
|
+
return CuratedMetric(
|
|
142
|
+
key=str(raw["key"]),
|
|
143
|
+
source_column=str(raw["source_column"]),
|
|
144
|
+
description=str(raw.get("description", "")),
|
|
145
|
+
unit=str(raw.get("unit", "")),
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _parse_section(raw: dict[str, Any] | str) -> CuratedSection:
|
|
150
|
+
if isinstance(raw, str):
|
|
151
|
+
return CuratedSection(name=raw)
|
|
152
|
+
return CuratedSection(
|
|
153
|
+
name=str(raw["name"]),
|
|
154
|
+
discriminator=raw.get("discriminator"),
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _parse_folder(raw: dict[str, Any]) -> CuratedFolder:
|
|
159
|
+
sections_raw = raw.get("sections") or []
|
|
160
|
+
sections = tuple(_parse_section(s) for s in sections_raw)
|
|
161
|
+
return CuratedFolder(
|
|
162
|
+
path=str(raw["path"]),
|
|
163
|
+
filename_regex=str(raw["filename_regex"]),
|
|
164
|
+
sections=sections,
|
|
165
|
+
discriminator=raw.get("discriminator"),
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _load_one(path: Path) -> CuratedDataset:
|
|
170
|
+
raw = yaml.safe_load(path.read_text(encoding="utf-8"))
|
|
171
|
+
folders = tuple(_parse_folder(f) for f in (raw.get("folders") or []))
|
|
172
|
+
if not folders:
|
|
173
|
+
raise ValueError(f"Curated YAML {path.name} has no folders")
|
|
174
|
+
filters = tuple(_parse_filter(f) for f in (raw.get("filters") or []))
|
|
175
|
+
metrics = tuple(_parse_metric(m) for m in (raw.get("metrics") or []))
|
|
176
|
+
return CuratedDataset(
|
|
177
|
+
id=str(raw["id"]),
|
|
178
|
+
name=str(raw["name"]),
|
|
179
|
+
description=str(raw.get("description", "")),
|
|
180
|
+
cadence=str(raw.get("cadence", "")),
|
|
181
|
+
cache_kind=str(raw.get("cache_kind", "live")), # type: ignore[arg-type]
|
|
182
|
+
folders=folders,
|
|
183
|
+
filters=filters,
|
|
184
|
+
metrics=metrics,
|
|
185
|
+
settlement_column=str(raw.get("settlement_column", "SETTLEMENTDATE")),
|
|
186
|
+
source_url=str(raw.get("source_url", "http://nemweb.com.au/Reports/Current/")),
|
|
187
|
+
search_keywords=tuple(raw.get("search_keywords") or ()),
|
|
188
|
+
examples=tuple(raw.get("examples") or ()),
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def _load_all() -> dict[str, CuratedDataset]:
|
|
193
|
+
out: dict[str, CuratedDataset] = {}
|
|
194
|
+
for path in sorted(_yaml_dir().glob("*.yaml")):
|
|
195
|
+
cd = _load_one(path)
|
|
196
|
+
out[cd.id] = cd
|
|
197
|
+
return out
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _registry() -> dict[str, CuratedDataset]:
|
|
201
|
+
global _REGISTRY
|
|
202
|
+
if _REGISTRY is None:
|
|
203
|
+
_REGISTRY = _load_all()
|
|
204
|
+
return _REGISTRY
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def get(dataset_id: str) -> CuratedDataset | None:
|
|
208
|
+
return _registry().get(dataset_id.strip().lower())
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def list_ids() -> list[str]:
|
|
212
|
+
return sorted(_registry().keys())
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def list_all() -> list[CuratedDataset]:
|
|
216
|
+
return [_registry()[k] for k in list_ids()]
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def reset_registry() -> None:
|
|
220
|
+
global _REGISTRY
|
|
221
|
+
_REGISTRY = None
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def compile_filename_regex(folder: CuratedFolder) -> re.Pattern[str]:
|
|
225
|
+
"""Compiled regex for filenames in this folder (cached on the dataclass would be nicer but it's frozen)."""
|
|
226
|
+
return re.compile(folder.filename_regex)
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
id: daily_summary
|
|
2
|
+
name: NEM Daily Summary (settled, prior trading day)
|
|
3
|
+
description: |
|
|
4
|
+
Daily rolled-up summary of the previous trading day's dispatch — settled
|
|
5
|
+
prices and demand by region. Source: PUBLIC_DAILY / DREGION.
|
|
6
|
+
|
|
7
|
+
AEMO publishes a single compendium ZIP shortly after 04:10 AEST each day
|
|
8
|
+
covering yesterday's full data, including post-AP (after-price) revisions.
|
|
9
|
+
Each row is one 5-min interval × region for the prior trading day, so a
|
|
10
|
+
single response has 288 intervals × 5 regions = 1,440 rows by default.
|
|
11
|
+
Use this for backfill and "weekly average dispatch price for VIC, last 4
|
|
12
|
+
weeks" type questions where you want settled values rather than the
|
|
13
|
+
preliminary dispatch values served by `dispatch_price`.
|
|
14
|
+
cadence: Daily
|
|
15
|
+
cache_kind: daily
|
|
16
|
+
settlement_column: SETTLEMENTDATE
|
|
17
|
+
source_url: http://nemweb.com.au/Reports/Current/Daily_Reports/
|
|
18
|
+
|
|
19
|
+
folders:
|
|
20
|
+
- path: /Reports/Current/Daily_Reports/
|
|
21
|
+
filename_regex: "PUBLIC_DAILY_\\d{12}_\\d{14}\\.zip"
|
|
22
|
+
sections:
|
|
23
|
+
# AEMO emits the daily region rollup as I,DREGION,,2,... — the
|
|
24
|
+
# second cell is empty, so the parser builds the section name
|
|
25
|
+
# "DREGION." (trailing dot). DREGION. is repeated v2 + v3 in the
|
|
26
|
+
# same file for old vs new schema; we read both and dedupe by
|
|
27
|
+
# (REGIONID, SETTLEMENTDATE) downstream.
|
|
28
|
+
- name: DREGION.
|
|
29
|
+
|
|
30
|
+
filters:
|
|
31
|
+
- key: region
|
|
32
|
+
description: NEM region — NSW1, QLD1, SA1, TAS1, VIC1.
|
|
33
|
+
values: [NSW1, QLD1, SA1, TAS1, VIC1]
|
|
34
|
+
column: REGIONID
|
|
35
|
+
|
|
36
|
+
metrics:
|
|
37
|
+
- key: rrp
|
|
38
|
+
source_column: RRP
|
|
39
|
+
description: Settled regional reference price ($/MWh)
|
|
40
|
+
unit: $/MWh
|
|
41
|
+
- key: total_demand
|
|
42
|
+
source_column: TOTALDEMAND
|
|
43
|
+
description: Total demand at the interval (MW)
|
|
44
|
+
unit: MW
|
|
45
|
+
- key: dispatchable_generation
|
|
46
|
+
source_column: DISPATCHABLEGENERATION
|
|
47
|
+
description: Dispatchable generation (scheduled + semi-scheduled, MW)
|
|
48
|
+
unit: MW
|
|
49
|
+
- key: net_interchange
|
|
50
|
+
source_column: NETINTERCHANGE
|
|
51
|
+
description: Net interconnector flow (MW). Positive = export.
|
|
52
|
+
unit: MW
|
|
53
|
+
|
|
54
|
+
search_keywords:
|
|
55
|
+
- daily summary
|
|
56
|
+
- daily report
|
|
57
|
+
- yesterday
|
|
58
|
+
- settled price
|
|
59
|
+
- settlement
|
|
60
|
+
- post ap
|
|
61
|
+
- daily aggregate
|
|
62
|
+
- daily price
|
|
63
|
+
- settled rrp
|
|
64
|
+
- daily settlement
|
|
65
|
+
|
|
66
|
+
examples:
|
|
67
|
+
- 'latest(dataset_id="daily_summary", filters={"region": "NSW1"}) # yesterday''s settled NSW prices'
|
|
68
|
+
- 'get_data(dataset_id="daily_summary", filters={"region": "VIC1"}, start_period="2026-04-17", end_period="2026-05-13") # 4 weeks of VIC settled dispatch'
|