aemo-mcp 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
aemo_mcp/__init__.py ADDED
@@ -0,0 +1,6 @@
1
+ from importlib.metadata import PackageNotFoundError, version as _version
2
+
3
+ try:
4
+ __version__ = _version("aemo-mcp")
5
+ except PackageNotFoundError: # editable install before metadata is generated
6
+ __version__ = "0.0.0+unknown"
aemo_mcp/cache.py ADDED
@@ -0,0 +1,139 @@
1
+ """SQLite-backed HTTP cache with per-read TTL.
2
+
3
+ Ported from rba-mcp 0.1.9. Same single-table design — TTL is evaluated at
4
+ read time so one cached row can satisfy different freshness windows. The
5
+ `kind` column lets us run targeted invalidation later.
6
+
7
+ CacheKind values are tuned to AEMO cadences:
8
+ - "live" → 60 seconds (5-min dispatch feeds)
9
+ - "half_hour" → 5 minutes (30-min feeds: rooftop PV, predispatch)
10
+ - "forecast" → 1 hour (longer-horizon forecast bundles)
11
+ - "daily" → 24 hours (daily rolled-up archives)
12
+ - "archive" → 7 days (MMSDM and other immutable historical files)
13
+ - "listing" → 30 seconds (NEMWEB directory HTML — drives latest-file detection)
14
+
15
+ NEMWEB's published files are immutable once written (filename embeds the
16
+ interval timestamp), so the file-body cache is effectively infinite — TTL
17
+ only matters for directory listings and "latest" calls.
18
+ """
19
+ from __future__ import annotations
20
+
21
+ import asyncio
22
+ import sqlite3
23
+ import time
24
+ from datetime import timedelta
25
+ from pathlib import Path
26
+ from typing import Literal
27
+
28
+ import aiosqlite
29
+
30
+ CacheKind = Literal["live", "half_hour", "forecast", "daily", "archive", "listing"]
31
+
32
+ DEFAULT_DB_PATH = Path.home() / ".aemo-mcp" / "cache.db"
33
+
34
+ TTL: dict[CacheKind, timedelta] = {
35
+ "live": timedelta(seconds=60),
36
+ "half_hour": timedelta(minutes=5),
37
+ "forecast": timedelta(hours=1),
38
+ "daily": timedelta(hours=24),
39
+ "archive": timedelta(days=7),
40
+ "listing": timedelta(seconds=30),
41
+ }
42
+
43
+ _SCHEMA = """
44
+ CREATE TABLE IF NOT EXISTS http_cache (
45
+ cache_key TEXT PRIMARY KEY,
46
+ payload BLOB NOT NULL,
47
+ cached_at REAL NOT NULL,
48
+ kind TEXT NOT NULL
49
+ );
50
+ CREATE INDEX IF NOT EXISTS idx_kind_cached_at ON http_cache(kind, cached_at);
51
+ """
52
+
53
+
54
+ class Cache:
55
+ def __init__(self, db_path: Path | None = None) -> None:
56
+ # Resolve DEFAULT_DB_PATH at construction time (not class-def time)
57
+ # so tests that monkeypatch the module-level constant take effect.
58
+ # Default Path() args are evaluated once at class definition, which
59
+ # is too early to be overridden — leaked across tests previously.
60
+ import aemo_mcp.cache as _self_mod
61
+ self.db_path = db_path or _self_mod.DEFAULT_DB_PATH
62
+ self.db_path.parent.mkdir(parents=True, exist_ok=True)
63
+ self._initialized = False
64
+ self._init_lock = asyncio.Lock()
65
+
66
+ async def _ensure_init(self) -> None:
67
+ if self._initialized:
68
+ return
69
+ async with self._init_lock:
70
+ if self._initialized:
71
+ return
72
+ try:
73
+ await self._init_schema()
74
+ except sqlite3.DatabaseError:
75
+ # Pre-existing cache.db is corrupt or has an incompatible
76
+ # schema. The cache is a perf optimisation, not a source of
77
+ # truth — drop and recreate is always safe.
78
+ self.db_path.unlink(missing_ok=True)
79
+ await self._init_schema()
80
+ self._initialized = True
81
+
82
+ async def _init_schema(self) -> None:
83
+ async with aiosqlite.connect(self.db_path) as conn:
84
+ await conn.execute("PRAGMA journal_mode=WAL")
85
+ await conn.executescript(_SCHEMA)
86
+ await conn.commit()
87
+
88
+ async def get(self, key: str, ttl: timedelta) -> bytes | None:
89
+ await self._ensure_init()
90
+ cutoff = time.time() - ttl.total_seconds()
91
+ async with aiosqlite.connect(self.db_path) as conn:
92
+ async with conn.execute(
93
+ "SELECT payload FROM http_cache WHERE cache_key = ? AND cached_at >= ?",
94
+ (key, cutoff),
95
+ ) as cur:
96
+ row = await cur.fetchone()
97
+ return row[0] if row else None
98
+
99
+ async def get_stale(self, key: str) -> tuple[bytes, float] | None:
100
+ """Return cached (payload, cached_at_epoch) regardless of TTL.
101
+
102
+ Used by the client as a fallback when NEMWEB is unavailable —
103
+ graceful degradation per CLAUDE.md quality dimension #4. The caller
104
+ computes "how stale" from the timestamp and surfaces it in
105
+ `DataResponse.stale_reason`.
106
+ """
107
+ await self._ensure_init()
108
+ async with aiosqlite.connect(self.db_path) as conn:
109
+ async with conn.execute(
110
+ "SELECT payload, cached_at FROM http_cache WHERE cache_key = ?",
111
+ (key,),
112
+ ) as cur:
113
+ row = await cur.fetchone()
114
+ return (row[0], row[1]) if row else None
115
+
116
+ async def set(self, key: str, value: bytes, kind: CacheKind) -> None:
117
+ await self._ensure_init()
118
+ async with aiosqlite.connect(self.db_path) as conn:
119
+ await conn.execute(
120
+ """
121
+ INSERT INTO http_cache (cache_key, payload, cached_at, kind)
122
+ VALUES (?, ?, ?, ?)
123
+ ON CONFLICT(cache_key) DO UPDATE SET
124
+ payload = excluded.payload,
125
+ cached_at = excluded.cached_at,
126
+ kind = excluded.kind
127
+ """,
128
+ (key, value, time.time(), kind),
129
+ )
130
+ await conn.commit()
131
+
132
+ async def clear(self, kind: CacheKind | None = None) -> None:
133
+ await self._ensure_init()
134
+ async with aiosqlite.connect(self.db_path) as conn:
135
+ if kind:
136
+ await conn.execute("DELETE FROM http_cache WHERE kind = ?", (kind,))
137
+ else:
138
+ await conn.execute("DELETE FROM http_cache")
139
+ await conn.commit()
aemo_mcp/client.py ADDED
@@ -0,0 +1,237 @@
1
+ """Async NEMWEB fetcher.
2
+
3
+ NEMWEB is a static IIS file server. There is no API — directory listings are
4
+ HTML; files are ZIPs containing CSVs.
5
+
6
+ Three responsibilities:
7
+
8
+ 1. `fetch_directory_listing(folder)` — GET the IIS HTML and return the list
9
+ of file names matching a regex.
10
+ 2. `fetch_zip(url)` — GET a single ZIP file and return its bytes.
11
+ 3. Cache + in-flight dedup — concurrent callers for the same URL share one
12
+ HTTP request. Critical at 5-min cadence where 50 concurrent `latest()`
13
+ calls would otherwise hammer NEMWEB.
14
+
15
+ NEMWEB's published files are immutable once written (filename embeds the
16
+ interval timestamp), so the file-body cache TTL is effectively infinite.
17
+ Only the directory listing has freshness sensitivity — that's the 30s TTL.
18
+ """
19
+ from __future__ import annotations
20
+
21
+ import asyncio
22
+ import re
23
+ import time
24
+ from contextvars import ContextVar
25
+ from typing import Any
26
+
27
+ import httpx
28
+
29
+ from .cache import TTL, Cache, CacheKind
30
+
31
+
32
+ # ─── stale signal (graceful-degradation reporting per CLAUDE.md dim #4) ─
33
+ # When NEMWEB is unreachable, _fetch_cached falls back to the cached payload
34
+ # regardless of TTL and records the staleness in this ContextVar. Server-side
35
+ # tool wrappers read it after the request chain and set
36
+ # DataResponse.stale / .stale_reason. ContextVar (not instance attr) so
37
+ # concurrent MCP tool calls each see their own state.
38
+ _stale_signal: ContextVar[tuple[bool, str | None]] = ContextVar(
39
+ "aemo_mcp_stale_signal", default=(False, None)
40
+ )
41
+
42
+
43
+ def reset_stale_signal() -> None:
44
+ """Clear the stale state. Call once at the start of each tool call."""
45
+ _stale_signal.set((False, None))
46
+
47
+
48
+ def get_stale_signal() -> tuple[bool, str | None]:
49
+ """Return (stale, reason) for the most recent fetch chain in this context."""
50
+ return _stale_signal.get()
51
+
52
+
53
+ def _mark_stale(reason: str) -> None:
54
+ """Record that a stale-cache fallback was served this context.
55
+
56
+ If multiple fetches in one chain are stale, we keep the FIRST reason
57
+ (it's usually the most informative — the originating upstream failure).
58
+ """
59
+ cur_stale, _ = _stale_signal.get()
60
+ if not cur_stale:
61
+ _stale_signal.set((True, reason))
62
+
63
+ DEFAULT_BASE_URL = "http://nemweb.com.au"
64
+ DEFAULT_TIMEOUT = httpx.Timeout(60.0, connect=10.0)
65
+
66
+ # IIS directory listings render entries as <A HREF="path">name</A>.
67
+ # Capture the file name (last segment after the slash) — relative or absolute.
68
+ _HREF_PATTERN = re.compile(
69
+ r'<a\s+href="([^"]+)"',
70
+ re.IGNORECASE,
71
+ )
72
+
73
+
74
+ class AEMOAPIError(Exception):
75
+ """Raised when NEMWEB returns a non-2xx response or the request fails."""
76
+
77
+
78
+ class AEMOClient:
79
+ def __init__(
80
+ self,
81
+ cache: Cache | None = None,
82
+ base_url: str = DEFAULT_BASE_URL,
83
+ transport: httpx.AsyncBaseTransport | None = None,
84
+ user_agent: str | None = None,
85
+ ) -> None:
86
+ self.base_url = base_url.rstrip("/")
87
+ self.cache = cache or Cache()
88
+ ua = user_agent or "aemo-mcp/0.1 (+https://github.com/Bigred97/aemo-mcp)"
89
+ self._http = httpx.AsyncClient(
90
+ timeout=DEFAULT_TIMEOUT,
91
+ transport=transport,
92
+ headers={"User-Agent": ua},
93
+ follow_redirects=True,
94
+ )
95
+ self._in_flight: dict[str, asyncio.Future[bytes]] = {}
96
+ self._in_flight_lock = asyncio.Lock()
97
+
98
+ async def aclose(self) -> None:
99
+ await self._http.aclose()
100
+
101
+ async def __aenter__(self) -> "AEMOClient":
102
+ return self
103
+
104
+ async def __aexit__(self, *exc: Any) -> None:
105
+ await self.aclose()
106
+
107
+ async def _fetch_cached(self, url: str, *, kind: CacheKind) -> bytes:
108
+ """Generic cached + in-flight-deduped fetch.
109
+
110
+ Concurrent callers for the same URL share one in-flight HTTP request.
111
+ """
112
+ cached = await self.cache.get(url, ttl=TTL[kind])
113
+ if cached is not None:
114
+ return cached
115
+
116
+ # Race-safe in-flight registration.
117
+ async with self._in_flight_lock:
118
+ existing = self._in_flight.get(url)
119
+ if existing is None:
120
+ future: asyncio.Future[bytes] = (
121
+ asyncio.get_running_loop().create_future()
122
+ )
123
+ self._in_flight[url] = future
124
+
125
+ if existing is not None:
126
+ return await existing
127
+
128
+ try:
129
+ try:
130
+ resp = await self._http.get(url)
131
+ resp.raise_for_status()
132
+ except (httpx.HTTPStatusError, httpx.RequestError) as e:
133
+ # Graceful degradation: when NEMWEB is unreachable, fall back
134
+ # to the most-recent cached payload (regardless of TTL) rather
135
+ # than raising and breaking the agent's chain of reasoning.
136
+ # The staleness is surfaced via the _stale_signal ContextVar
137
+ # and ends up in DataResponse.stale / stale_reason.
138
+ fallback = await self.cache.get_stale(url)
139
+ if fallback is not None:
140
+ payload, cached_at = fallback
141
+ age_min = max(0, int((time.time() - cached_at) / 60))
142
+ if isinstance(e, httpx.HTTPStatusError):
143
+ upstream = (
144
+ f"AEMO/OpenNEM fetch returned "
145
+ f"{e.response.status_code}"
146
+ )
147
+ else:
148
+ upstream = (
149
+ f"AEMO/OpenNEM fetch unreachable "
150
+ f"({type(e).__name__})"
151
+ )
152
+ _mark_stale(
153
+ f"{upstream} for {url}; serving cached payload from "
154
+ f"~{age_min} minute(s) ago"
155
+ )
156
+ future.set_result(payload)
157
+ return payload
158
+ # Genuinely no cache to fall back to — preserve original behaviour
159
+ if isinstance(e, httpx.HTTPStatusError):
160
+ raise AEMOAPIError(
161
+ f"NEMWEB returned {e.response.status_code} for {url}"
162
+ ) from e
163
+ raise AEMOAPIError(f"NEMWEB request failed: {e}") from e
164
+ await self.cache.set(url, resp.content, kind=kind)
165
+ future.set_result(resp.content)
166
+ return resp.content
167
+ except BaseException as e:
168
+ if not future.done():
169
+ future.set_exception(e)
170
+ # When no other coroutine is awaiting this future, the
171
+ # exception we just set would be GC'd unretrieved and Python
172
+ # would log "Future exception was never retrieved". Mark it
173
+ # retrieved here — the calling coroutine still gets `raise` so
174
+ # the exception propagates normally.
175
+ try:
176
+ future.exception()
177
+ except Exception:
178
+ pass
179
+ raise
180
+ finally:
181
+ async with self._in_flight_lock:
182
+ self._in_flight.pop(url, None)
183
+
184
+ async def fetch_directory_listing(
185
+ self,
186
+ folder: str,
187
+ *,
188
+ filename_regex: re.Pattern[str] | None = None,
189
+ kind: CacheKind = "listing",
190
+ ) -> list[str]:
191
+ """Fetch a NEMWEB folder's HTML listing and return sorted file names.
192
+
193
+ `folder` is a path under `base_url`, e.g.
194
+ '/Reports/Current/DispatchIS_Reports/'. Trailing slash optional.
195
+
196
+ If `filename_regex` is provided, only file names that fully match it
197
+ are returned. The list is sorted ascending — for AEMO filename
198
+ patterns (timestamp-prefixed) this means the LAST entry is the most
199
+ recent.
200
+ """
201
+ if not folder.startswith("/"):
202
+ folder = "/" + folder
203
+ if not folder.endswith("/"):
204
+ folder = folder + "/"
205
+ url = f"{self.base_url}{folder}"
206
+ body = await self._fetch_cached(url, kind=kind)
207
+ text = body.decode("utf-8", errors="replace")
208
+
209
+ names: list[str] = []
210
+ for match in _HREF_PATTERN.finditer(text):
211
+ href = match.group(1)
212
+ # Normalise: drop everything up to the final slash.
213
+ name = href.rsplit("/", 1)[-1]
214
+ if not name or name in (".", ".."):
215
+ continue
216
+ if filename_regex is not None and not filename_regex.fullmatch(name):
217
+ continue
218
+ names.append(name)
219
+ names.sort()
220
+ return names
221
+
222
+ async def fetch_zip(self, url: str, *, kind: CacheKind = "archive") -> bytes:
223
+ """Fetch a single NEMWEB ZIP file. Returns raw bytes.
224
+
225
+ Timestamped files are immutable — once a file with name
226
+ `PUBLIC_DISPATCHIS_202605141000_X.zip` exists it never changes. Cache
227
+ with `archive` TTL (7 days) by default. Caller can override for
228
+ feeds where files might be republished.
229
+ """
230
+ return await self._fetch_cached(url, kind=kind)
231
+
232
+ def build_url(self, folder: str, filename: str) -> str:
233
+ if not folder.startswith("/"):
234
+ folder = "/" + folder
235
+ if not folder.endswith("/"):
236
+ folder = folder + "/"
237
+ return f"{self.base_url}{folder}{filename}"
aemo_mcp/curated.py ADDED
@@ -0,0 +1,226 @@
1
+ """Hand-curated metadata for the 7 NEM datasets.
2
+
3
+ Each YAML in `data/curated/` describes one dataset:
4
+ - backend folder + filename pattern (regex)
5
+ - multi-section CSV section name(s) to extract
6
+ - filter dimensions (region, interconnector, metric, ...)
7
+ - cadence + cache TTL kind
8
+ - units + plain-English search keywords
9
+
10
+ Loaded once at import time and stored in a frozen dataclass registry.
11
+ """
12
+ from __future__ import annotations
13
+
14
+ import re
15
+ from dataclasses import dataclass, field
16
+ from importlib import resources
17
+ from pathlib import Path
18
+ from typing import Any
19
+
20
+ import yaml
21
+
22
+ from .cache import CacheKind
23
+ from .models import DatasetDetail, DatasetFilter
24
+
25
+
26
+ @dataclass(frozen=True)
27
+ class CuratedFilter:
28
+ key: str
29
+ description: str
30
+ values: tuple[str, ...] = ()
31
+ required: bool = False
32
+ # Maps user-facing filter values → CSV row predicates. Each predicate is
33
+ # a dict {column: value}. Most filters map 1:1 to a single column.
34
+ column: str | None = None
35
+
36
+
37
+ @dataclass(frozen=True)
38
+ class CuratedMetric:
39
+ """One numeric metric extracted from the CSV section."""
40
+ key: str # response-side key (e.g. "rrp")
41
+ source_column: str # column header in the CSV section
42
+ description: str # plain-English
43
+ unit: str # e.g. "$/MWh"
44
+
45
+
46
+ @dataclass(frozen=True)
47
+ class CuratedSection:
48
+ """One CSV section to extract from the fetched ZIP."""
49
+ name: str # e.g. "DISPATCH.PRICE"
50
+ # When a single dataset stitches together multiple sections (eg.
51
+ # `rooftop_pv` combines ACTUAL + FORECAST from two different folders),
52
+ # `discriminator` distinguishes them in the output dimensions.
53
+ discriminator: str | None = None
54
+
55
+
56
+ @dataclass(frozen=True)
57
+ class CuratedFolder:
58
+ """One NEMWEB folder this dataset fetches from."""
59
+ path: str # e.g. "/Reports/Current/DispatchIS_Reports/"
60
+ filename_regex: str # e.g. "PUBLIC_DISPATCHIS_.*\\.zip"
61
+ sections: tuple[CuratedSection, ...] = ()
62
+ discriminator: str | None = None # see CuratedSection.discriminator
63
+
64
+
65
+ @dataclass(frozen=True)
66
+ class CuratedDataset:
67
+ id: str
68
+ name: str
69
+ description: str
70
+ cadence: str # plain-English: "5 min" / "30 min" / "Daily"
71
+ cache_kind: CacheKind
72
+ folders: tuple[CuratedFolder, ...]
73
+ filters: tuple[CuratedFilter, ...] = ()
74
+ metrics: tuple[CuratedMetric, ...] = ()
75
+ settlement_column: str = "SETTLEMENTDATE" # AEMO-standard period column
76
+ source_url: str = "http://nemweb.com.au/Reports/Current/"
77
+ search_keywords: tuple[str, ...] = ()
78
+ examples: tuple[str, ...] = ()
79
+
80
+ def to_detail(self) -> DatasetDetail:
81
+ return DatasetDetail(
82
+ id=self.id,
83
+ name=self.name,
84
+ description=self.description,
85
+ is_curated=True,
86
+ cadence=self.cadence,
87
+ filters=[
88
+ DatasetFilter(
89
+ key=f.key,
90
+ description=f.description,
91
+ values=list(f.values),
92
+ required=f.required,
93
+ )
94
+ for f in self.filters
95
+ ],
96
+ units={m.key: m.unit for m in self.metrics},
97
+ source_url=self.source_url,
98
+ examples=list(self.examples),
99
+ )
100
+
101
+ def get_filter(self, key: str) -> CuratedFilter | None:
102
+ for f in self.filters:
103
+ if f.key == key:
104
+ return f
105
+ return None
106
+
107
+ def get_metric(self, key: str) -> CuratedMetric | None:
108
+ for m in self.metrics:
109
+ if m.key == key:
110
+ return m
111
+ return None
112
+
113
+
114
+ _REGISTRY: dict[str, CuratedDataset] | None = None
115
+
116
+
117
+ def _yaml_dir() -> Path:
118
+ try:
119
+ ref = resources.files("aemo_mcp").joinpath("data/curated")
120
+ if ref.is_dir():
121
+ return Path(str(ref))
122
+ except (ModuleNotFoundError, AttributeError):
123
+ pass
124
+ here = Path(__file__).resolve().parent / "data" / "curated"
125
+ if here.is_dir():
126
+ return here
127
+ raise FileNotFoundError("Could not locate aemo_mcp/data/curated/")
128
+
129
+
130
+ def _parse_filter(raw: dict[str, Any]) -> CuratedFilter:
131
+ return CuratedFilter(
132
+ key=str(raw["key"]),
133
+ description=str(raw.get("description", "")),
134
+ values=tuple(raw.get("values") or ()),
135
+ required=bool(raw.get("required", False)),
136
+ column=raw.get("column"),
137
+ )
138
+
139
+
140
+ def _parse_metric(raw: dict[str, Any]) -> CuratedMetric:
141
+ return CuratedMetric(
142
+ key=str(raw["key"]),
143
+ source_column=str(raw["source_column"]),
144
+ description=str(raw.get("description", "")),
145
+ unit=str(raw.get("unit", "")),
146
+ )
147
+
148
+
149
+ def _parse_section(raw: dict[str, Any] | str) -> CuratedSection:
150
+ if isinstance(raw, str):
151
+ return CuratedSection(name=raw)
152
+ return CuratedSection(
153
+ name=str(raw["name"]),
154
+ discriminator=raw.get("discriminator"),
155
+ )
156
+
157
+
158
+ def _parse_folder(raw: dict[str, Any]) -> CuratedFolder:
159
+ sections_raw = raw.get("sections") or []
160
+ sections = tuple(_parse_section(s) for s in sections_raw)
161
+ return CuratedFolder(
162
+ path=str(raw["path"]),
163
+ filename_regex=str(raw["filename_regex"]),
164
+ sections=sections,
165
+ discriminator=raw.get("discriminator"),
166
+ )
167
+
168
+
169
+ def _load_one(path: Path) -> CuratedDataset:
170
+ raw = yaml.safe_load(path.read_text(encoding="utf-8"))
171
+ folders = tuple(_parse_folder(f) for f in (raw.get("folders") or []))
172
+ if not folders:
173
+ raise ValueError(f"Curated YAML {path.name} has no folders")
174
+ filters = tuple(_parse_filter(f) for f in (raw.get("filters") or []))
175
+ metrics = tuple(_parse_metric(m) for m in (raw.get("metrics") or []))
176
+ return CuratedDataset(
177
+ id=str(raw["id"]),
178
+ name=str(raw["name"]),
179
+ description=str(raw.get("description", "")),
180
+ cadence=str(raw.get("cadence", "")),
181
+ cache_kind=str(raw.get("cache_kind", "live")), # type: ignore[arg-type]
182
+ folders=folders,
183
+ filters=filters,
184
+ metrics=metrics,
185
+ settlement_column=str(raw.get("settlement_column", "SETTLEMENTDATE")),
186
+ source_url=str(raw.get("source_url", "http://nemweb.com.au/Reports/Current/")),
187
+ search_keywords=tuple(raw.get("search_keywords") or ()),
188
+ examples=tuple(raw.get("examples") or ()),
189
+ )
190
+
191
+
192
+ def _load_all() -> dict[str, CuratedDataset]:
193
+ out: dict[str, CuratedDataset] = {}
194
+ for path in sorted(_yaml_dir().glob("*.yaml")):
195
+ cd = _load_one(path)
196
+ out[cd.id] = cd
197
+ return out
198
+
199
+
200
+ def _registry() -> dict[str, CuratedDataset]:
201
+ global _REGISTRY
202
+ if _REGISTRY is None:
203
+ _REGISTRY = _load_all()
204
+ return _REGISTRY
205
+
206
+
207
+ def get(dataset_id: str) -> CuratedDataset | None:
208
+ return _registry().get(dataset_id.strip().lower())
209
+
210
+
211
+ def list_ids() -> list[str]:
212
+ return sorted(_registry().keys())
213
+
214
+
215
+ def list_all() -> list[CuratedDataset]:
216
+ return [_registry()[k] for k in list_ids()]
217
+
218
+
219
+ def reset_registry() -> None:
220
+ global _REGISTRY
221
+ _REGISTRY = None
222
+
223
+
224
+ def compile_filename_regex(folder: CuratedFolder) -> re.Pattern[str]:
225
+ """Compiled regex for filenames in this folder (cached on the dataclass would be nicer but it's frozen)."""
226
+ return re.compile(folder.filename_regex)
@@ -0,0 +1,68 @@
1
+ id: daily_summary
2
+ name: NEM Daily Summary (settled, prior trading day)
3
+ description: |
4
+ Daily rolled-up summary of the previous trading day's dispatch — settled
5
+ prices and demand by region. Source: PUBLIC_DAILY / DREGION.
6
+
7
+ AEMO publishes a single compendium ZIP shortly after 04:10 AEST each day
8
+ covering yesterday's full data, including post-AP (after-price) revisions.
9
+ Each row is one 5-min interval × region for the prior trading day, so a
10
+ single response has 288 intervals × 5 regions = 1,440 rows by default.
11
+ Use this for backfill and "weekly average dispatch price for VIC, last 4
12
+ weeks" type questions where you want settled values rather than the
13
+ preliminary dispatch values served by `dispatch_price`.
14
+ cadence: Daily
15
+ cache_kind: daily
16
+ settlement_column: SETTLEMENTDATE
17
+ source_url: http://nemweb.com.au/Reports/Current/Daily_Reports/
18
+
19
+ folders:
20
+ - path: /Reports/Current/Daily_Reports/
21
+ filename_regex: "PUBLIC_DAILY_\\d{12}_\\d{14}\\.zip"
22
+ sections:
23
+ # AEMO emits the daily region rollup as I,DREGION,,2,... — the
24
+ # second cell is empty, so the parser builds the section name
25
+ # "DREGION." (trailing dot). DREGION. is repeated v2 + v3 in the
26
+ # same file for old vs new schema; we read both and dedupe by
27
+ # (REGIONID, SETTLEMENTDATE) downstream.
28
+ - name: DREGION.
29
+
30
+ filters:
31
+ - key: region
32
+ description: NEM region — NSW1, QLD1, SA1, TAS1, VIC1.
33
+ values: [NSW1, QLD1, SA1, TAS1, VIC1]
34
+ column: REGIONID
35
+
36
+ metrics:
37
+ - key: rrp
38
+ source_column: RRP
39
+ description: Settled regional reference price ($/MWh)
40
+ unit: $/MWh
41
+ - key: total_demand
42
+ source_column: TOTALDEMAND
43
+ description: Total demand at the interval (MW)
44
+ unit: MW
45
+ - key: dispatchable_generation
46
+ source_column: DISPATCHABLEGENERATION
47
+ description: Dispatchable generation (scheduled + semi-scheduled, MW)
48
+ unit: MW
49
+ - key: net_interchange
50
+ source_column: NETINTERCHANGE
51
+ description: Net interconnector flow (MW). Positive = export.
52
+ unit: MW
53
+
54
+ search_keywords:
55
+ - daily summary
56
+ - daily report
57
+ - yesterday
58
+ - settled price
59
+ - settlement
60
+ - post ap
61
+ - daily aggregate
62
+ - daily price
63
+ - settled rrp
64
+ - daily settlement
65
+
66
+ examples:
67
+ - 'latest(dataset_id="daily_summary", filters={"region": "NSW1"}) # yesterday''s settled NSW prices'
68
+ - 'get_data(dataset_id="daily_summary", filters={"region": "VIC1"}, start_period="2026-04-17", end_period="2026-05-13") # 4 weeks of VIC settled dispatch'