publicsgdata-mcp 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *$py.class
4
+ *.so
5
+ .Python
6
+ .venv/
7
+ venv/
8
+ env/
9
+ dist/
10
+ build/
11
+ *.egg-info/
12
+ .mypy_cache/
13
+ .ruff_cache/
14
+ .pytest_cache/
15
+ .coverage
16
+ coverage.xml
17
+ htmlcov/
18
+ *.log
19
+ .DS_Store
20
+ .idea/
21
+ .vscode/
@@ -0,0 +1,21 @@
1
+ # Changelog
2
+
3
+ Format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
4
+
5
+ ## [0.2.0](https://github.com/harrytran001/publicsgdata/compare/publicsgdata-mcp-v0.1.0...publicsgdata-mcp-v0.2.0) (2026-06-09)
6
+
7
+
8
+ ### Features
9
+
10
+ * add publicsgdata-mcp local stdio server. ([d72a155](https://github.com/harrytran001/publicsgdata/commit/d72a155fde4d6634023065dc54025a396fbd9ffa))
11
+ * generalize realtime MCP tools ([c9d2d36](https://github.com/harrytran001/publicsgdata/commit/c9d2d36104e7d9518b81de2278d448ea18990b51))
12
+ * refactor packages + add mcp server ([272ad97](https://github.com/harrytran001/publicsgdata/commit/272ad97e809413fe07b9ac0a8c1d5ebcbbed8d56))
13
+
14
+ ## [Unreleased]
15
+
16
+ ## [0.1.0] - 2026-06-09
17
+
18
+ ### Added
19
+
20
+ - Local stdio MCP server for data.gov.sg catalog preview, search, and full dataset download
21
+ - Tools: `list_datasets`, `get_dataset_metadata`, `preview_dataset_rows`, `search_dataset_rows`, `get_dataset_download_url`, `download_dataset_file`, `list_realtime_datasets`, `describe_realtime_dataset`, `fetch_realtime_data`
@@ -0,0 +1,57 @@
1
+ Metadata-Version: 2.4
2
+ Name: publicsgdata-mcp
3
+ Version: 0.2.0
4
+ Summary: Local MCP server for Singapore government open data via publicsgdata
5
+ Author: publicsgdata contributors
6
+ License-Expression: MIT
7
+ Keywords: ai,data.gov.sg,mcp,open-data,singapore
8
+ Requires-Python: >=3.10
9
+ Requires-Dist: mcp[cli]>=1.4
10
+ Requires-Dist: publicsgdata
11
+ Description-Content-Type: text/markdown
12
+
13
+ # publicsgdata-mcp
14
+
15
+ Local [Model Context Protocol](https://modelcontextprotocol.io/) server for exploring and downloading Singapore government open data through the `publicsgdata` SDK.
16
+
17
+ ## Cursor config
18
+
19
+ Add to `.cursor/mcp.json`:
20
+
21
+ ```json
22
+ {
23
+ "mcpServers": {
24
+ "publicsgdata": {
25
+ "type": "stdio",
26
+ "command": "uvx",
27
+ "args": ["publicsgdata-mcp"],
28
+ "env": {
29
+ "DATA_GOV_SG_API_KEY": "${env:DATA_GOV_SG_API_KEY}"
30
+ }
31
+ }
32
+ }
33
+ }
34
+ ```
35
+
36
+ ## Tools
37
+
38
+ | Tool | Purpose |
39
+ |------|---------|
40
+ | `list_datasets` | Browse the catalog |
41
+ | `get_dataset_metadata` | Schema, size, coverage |
42
+ | `preview_dataset_rows` | Small sample for inspection |
43
+ | `search_dataset_rows` | Filter/search within a dataset |
44
+ | `get_dataset_download_url` | Temporary URL for full export |
45
+ | `download_dataset_file` | Save full dataset locally |
46
+ | `list_realtime_datasets` | List supported realtime dataset names |
47
+ | `describe_realtime_dataset` | Full parameter and response docs for one realtime dataset |
48
+ | `fetch_realtime_data` | Fetch data from a realtime API by `dataset_name` |
49
+
50
+ Downloads are cached under `~/.cache/publicsgdata-mcp` by default. Override with `PUBLICSGDATA_MCP_CACHE_DIR`.
51
+
52
+ ## Environment variables
53
+
54
+ | Variable | Required | Description |
55
+ |----------|----------|-------------|
56
+ | `DATA_GOV_SG_API_KEY` | No | Higher rate limits for data.gov.sg |
57
+ | `PUBLICSGDATA_MCP_CACHE_DIR` | No | Override download cache directory |
@@ -0,0 +1,45 @@
1
+ # publicsgdata-mcp
2
+
3
+ Local [Model Context Protocol](https://modelcontextprotocol.io/) server for exploring and downloading Singapore government open data through the `publicsgdata` SDK.
4
+
5
+ ## Cursor config
6
+
7
+ Add to `.cursor/mcp.json`:
8
+
9
+ ```json
10
+ {
11
+ "mcpServers": {
12
+ "publicsgdata": {
13
+ "type": "stdio",
14
+ "command": "uvx",
15
+ "args": ["publicsgdata-mcp"],
16
+ "env": {
17
+ "DATA_GOV_SG_API_KEY": "${env:DATA_GOV_SG_API_KEY}"
18
+ }
19
+ }
20
+ }
21
+ }
22
+ ```
23
+
24
+ ## Tools
25
+
26
+ | Tool | Purpose |
27
+ |------|---------|
28
+ | `list_datasets` | Browse the catalog |
29
+ | `get_dataset_metadata` | Schema, size, coverage |
30
+ | `preview_dataset_rows` | Small sample for inspection |
31
+ | `search_dataset_rows` | Filter/search within a dataset |
32
+ | `get_dataset_download_url` | Temporary URL for full export |
33
+ | `download_dataset_file` | Save full dataset locally |
34
+ | `list_realtime_datasets` | List supported realtime dataset names |
35
+ | `describe_realtime_dataset` | Full parameter and response docs for one realtime dataset |
36
+ | `fetch_realtime_data` | Fetch data from a realtime API by `dataset_name` |
37
+
38
+ Downloads are cached under `~/.cache/publicsgdata-mcp` by default. Override with `PUBLICSGDATA_MCP_CACHE_DIR`.
39
+
40
+ ## Environment variables
41
+
42
+ | Variable | Required | Description |
43
+ |----------|----------|-------------|
44
+ | `DATA_GOV_SG_API_KEY` | No | Higher rate limits for data.gov.sg |
45
+ | `PUBLICSGDATA_MCP_CACHE_DIR` | No | Override download cache directory |
@@ -0,0 +1,29 @@
1
+ [project]
2
+ name = "publicsgdata-mcp"
3
+ version = "0.2.0"
4
+ description = "Local MCP server for Singapore government open data via publicsgdata"
5
+ readme = "README.md"
6
+ license = "MIT"
7
+ requires-python = ">=3.10"
8
+ authors = [{ name = "publicsgdata contributors" }]
9
+ keywords = ["singapore", "open-data", "data.gov.sg", "mcp", "ai"]
10
+ dependencies = [
11
+ "mcp[cli]>=1.4",
12
+ "publicsgdata",
13
+ ]
14
+
15
+ [project.scripts]
16
+ publicsgdata-mcp = "publicsgdata_mcp.server:main"
17
+
18
+ [build-system]
19
+ requires = ["hatchling>=1.26"]
20
+ build-backend = "hatchling.build"
21
+
22
+ [tool.hatch.build.targets.wheel]
23
+ packages = ["src/publicsgdata_mcp"]
24
+
25
+ [tool.uv.sources]
26
+ publicsgdata = { workspace = true }
27
+
28
+ [tool.pytest.ini_options]
29
+ testpaths = ["tests"]
@@ -0,0 +1,3 @@
1
+ """publicsgdata MCP server."""
2
+
3
+ __version__ = "0.2.0"
@@ -0,0 +1,24 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from pathlib import Path
5
+
6
+ ENV_CACHE_DIR = "PUBLICSGDATA_MCP_CACHE_DIR"
7
+
8
+
9
+ def default_cache_dir() -> Path:
10
+ override = os.environ.get(ENV_CACHE_DIR)
11
+ if override:
12
+ return Path(override).expanduser()
13
+ return Path.home() / ".cache" / "publicsgdata-mcp"
14
+
15
+
16
+ def dataset_cache_path(dataset_id: str, *, filename: str | None = None) -> Path:
17
+ cache_dir = default_cache_dir() / "datasets" / dataset_id
18
+ cache_dir.mkdir(parents=True, exist_ok=True)
19
+ if filename:
20
+ safe_filename = Path(filename).name
21
+ if not safe_filename:
22
+ raise ValueError("filename must include a file name")
23
+ return cache_dir / safe_filename
24
+ return cache_dir / f"{dataset_id}.csv"
@@ -0,0 +1,195 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Any
5
+
6
+
7
+ @dataclass(frozen=True)
8
+ class RealtimeParameter:
9
+ name: str
10
+ type: str
11
+ required: bool
12
+ description: str
13
+ example: str | None = None
14
+
15
+
16
+ @dataclass(frozen=True)
17
+ class RealtimeResponseField:
18
+ name: str
19
+ type: str
20
+ description: str
21
+
22
+
23
+ @dataclass(frozen=True)
24
+ class RealtimeApiDefinition:
25
+ dataset_name: str
26
+ title: str
27
+ summary: str
28
+ description: str
29
+ path: str
30
+ host: str
31
+ update_frequency: str | None
32
+ parameters: tuple[RealtimeParameter, ...]
33
+ response_fields: tuple[RealtimeResponseField, ...]
34
+ response_notes: str
35
+ dataset_url: str | None = None
36
+ aliases: tuple[str, ...] = ()
37
+
38
+
39
+ REALTIME_APIS: tuple[RealtimeApiDefinition, ...] = (
40
+ RealtimeApiDefinition(
41
+ dataset_name="air_quality_pm25_hourly_by_region",
42
+ title="PM2.5 hourly readings by region",
43
+ summary="Hourly PM2.5 air quality readings for Singapore regions from NEA.",
44
+ description=(
45
+ "Returns the latest or historical PM2.5 readings from the data.gov.sg v2 "
46
+ "real-time API. Readings are grouped by major regions such as north, south, "
47
+ "east, west, and central, with map label coordinates in region_metadata."
48
+ ),
49
+ path="/pm25",
50
+ host="v2_realtime",
51
+ update_frequency="Hourly",
52
+ parameters=(
53
+ RealtimeParameter(
54
+ name="date",
55
+ type="string",
56
+ required=False,
57
+ description=(
58
+ "SGT date or datetime filter. Use YYYY-MM-DD for all readings on a day, "
59
+ "or YYYY-MM-DDTHH:MM:SS for readings at a specific moment. "
60
+ "Omit to fetch the latest reading."
61
+ ),
62
+ example="2024-07-16",
63
+ ),
64
+ RealtimeParameter(
65
+ name="paginationToken",
66
+ type="string",
67
+ required=False,
68
+ description=(
69
+ "Pagination token from a previous response when requesting a full day "
70
+ "or large historical range."
71
+ ),
72
+ ),
73
+ ),
74
+ response_fields=(
75
+ RealtimeResponseField(
76
+ name="region_metadata",
77
+ type="array",
78
+ description="Regions with name and labelLocation latitude/longitude for mapping.",
79
+ ),
80
+ RealtimeResponseField(
81
+ name="items",
82
+ type="array",
83
+ description=(
84
+ "Reading snapshots. Each item includes date, timestamp, "
85
+ "updatedTimestamp, and readings.pm25_one_hourly by region."
86
+ ),
87
+ ),
88
+ RealtimeResponseField(
89
+ name="pagination_token",
90
+ type="string|null",
91
+ description="Token for the next page when more historical readings exist.",
92
+ ),
93
+ ),
94
+ response_notes="Units are µg/m3. Latest reading is returned when date is omitted.",
95
+ dataset_url="https://data.gov.sg/datasets?formats=API",
96
+ aliases=("pm25", "pm2_5", "air_quality"),
97
+ ),
98
+ )
99
+
100
+
101
+ def _index_apis() -> dict[str, RealtimeApiDefinition]:
102
+ indexed: dict[str, RealtimeApiDefinition] = {}
103
+ for api in REALTIME_APIS:
104
+ indexed[api.dataset_name.lower()] = api
105
+ for alias in api.aliases:
106
+ indexed[alias.lower()] = api
107
+ return indexed
108
+
109
+
110
+ _API_INDEX = _index_apis()
111
+
112
+
113
+ def list_realtime_dataset_names() -> list[str]:
114
+ return [api.dataset_name for api in REALTIME_APIS]
115
+
116
+
117
+ def resolve_realtime_api(dataset_name: str) -> RealtimeApiDefinition:
118
+ key = dataset_name.strip().lower()
119
+ api = _API_INDEX.get(key)
120
+ if api is None:
121
+ known = ", ".join(list_realtime_dataset_names())
122
+ raise ValueError(
123
+ f"Unknown realtime dataset_name {dataset_name!r}. Known datasets: {known}"
124
+ )
125
+ return api
126
+
127
+
128
+ def describe_realtime_api(dataset_name: str) -> dict[str, Any]:
129
+ api = resolve_realtime_api(dataset_name)
130
+ return {
131
+ "dataset_name": api.dataset_name,
132
+ "title": api.title,
133
+ "summary": api.summary,
134
+ "description": api.description,
135
+ "host": api.host,
136
+ "path": api.path,
137
+ "update_frequency": api.update_frequency,
138
+ "dataset_url": api.dataset_url,
139
+ "aliases": list(api.aliases),
140
+ "parameters": [
141
+ {
142
+ "name": param.name,
143
+ "type": param.type,
144
+ "required": param.required,
145
+ "description": param.description,
146
+ "example": param.example,
147
+ }
148
+ for param in api.parameters
149
+ ],
150
+ "response_fields": [
151
+ {
152
+ "name": field.name,
153
+ "type": field.type,
154
+ "description": field.description,
155
+ }
156
+ for field in api.response_fields
157
+ ],
158
+ "response_notes": api.response_notes,
159
+ "example_request": {
160
+ "dataset_name": api.dataset_name,
161
+ "parameters": {
162
+ param.name: param.example
163
+ for param in api.parameters
164
+ if param.example is not None
165
+ },
166
+ },
167
+ }
168
+
169
+
170
+ def normalize_realtime_parameters(
171
+ api: RealtimeApiDefinition,
172
+ parameters: dict[str, Any] | None,
173
+ ) -> dict[str, str]:
174
+ incoming = parameters or {}
175
+ allowed = {param.name for param in api.parameters}
176
+ unknown = sorted(set(incoming) - allowed)
177
+ if unknown:
178
+ raise ValueError(
179
+ f"Unknown parameters for {api.dataset_name}: {unknown}. "
180
+ f"Allowed: {sorted(allowed)}"
181
+ )
182
+
183
+ missing = [
184
+ param.name
185
+ for param in api.parameters
186
+ if param.required and param.name not in incoming
187
+ ]
188
+ if missing:
189
+ raise ValueError(f"Missing required parameters for {api.dataset_name}: {missing}")
190
+
191
+ encoded: dict[str, str] = {}
192
+ for key, value in incoming.items():
193
+ if value is not None:
194
+ encoded[key] = str(value)
195
+ return encoded
@@ -0,0 +1,292 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Annotated, Any
4
+
5
+ from mcp.server.fastmcp import FastMCP
6
+ from pydantic import Field
7
+
8
+ from publicsgdata_mcp import tools
9
+
10
+ mcp = FastMCP("publicsgdata")
11
+
12
+ DatasetId = Annotated[
13
+ str,
14
+ Field(
15
+ description=(
16
+ "data.gov.sg dataset ID (starts with d_). "
17
+ "Example: d_8b84c4ee58e3cfc0ece0d773c8ca6abc for HDB resale prices."
18
+ ),
19
+ ),
20
+ ]
21
+
22
+
23
+ @mcp.tool()
24
+ def list_datasets(
25
+ page: Annotated[
26
+ int | None,
27
+ Field(
28
+ description="Optional 1-based catalog page number. Omit to fetch the first page.",
29
+ ),
30
+ ] = None,
31
+ ) -> str:
32
+ """List datasets available on data.gov.sg.
33
+
34
+ Returns JSON:
35
+ - datasets (list): Catalog entries with dataset_id, name, format, status, coverage dates
36
+ - pages (int | null): Total catalog pages when paginated
37
+ """
38
+ return tools.list_datasets(page=page)
39
+
40
+
41
+ @mcp.tool()
42
+ def get_dataset_metadata(dataset_id: DatasetId) -> str:
43
+ """Get metadata for a dataset, including column definitions when available.
44
+
45
+ Returns JSON:
46
+ - dataset_id (str): Dataset identifier
47
+ - name (str): Dataset title
48
+ - description (str | null): Dataset summary
49
+ - format (str | null): File/API format, e.g. CSV
50
+ - dataset_size (int | null): Approximate row count
51
+ - coverage_start / coverage_end (str | null): Data time range
52
+ - column_metadata (object | null): Column names and types for interpreting rows
53
+ """
54
+ return tools.get_dataset_metadata(dataset_id)
55
+
56
+
57
+ @mcp.tool()
58
+ def preview_dataset_rows(
59
+ dataset_id: DatasetId,
60
+ limit: Annotated[
61
+ int,
62
+ Field(
63
+ description="Number of rows to preview. Capped at 50. Default 10.",
64
+ ge=1,
65
+ le=50,
66
+ ),
67
+ ] = 10,
68
+ cursor: Annotated[
69
+ str | None,
70
+ Field(
71
+ description=(
72
+ "Pagination cursor from a previous response links.next value. "
73
+ "Omit on the first request."
74
+ ),
75
+ ),
76
+ ] = None,
77
+ ) -> str:
78
+ """Preview a bounded sample of rows from a dataset.
79
+
80
+ Use this before downloading the full file. Row keys match the dataset columns.
81
+
82
+ Returns JSON:
83
+ - dataset_id (str): Requested dataset ID
84
+ - dataset_name (str | null): Dataset title
85
+ - rows (list[object]): Sample records; each object is one row keyed by column name
86
+ - limit (int): Applied row limit
87
+ - links.next (str | null): Cursor for the next page, if more rows exist
88
+ """
89
+ return tools.preview_dataset_rows(dataset_id, limit=limit, cursor=cursor)
90
+
91
+
92
+ @mcp.tool()
93
+ def search_dataset_rows(
94
+ dataset_id: DatasetId,
95
+ q: Annotated[
96
+ str | None,
97
+ Field(
98
+ description=(
99
+ "Full-text search query for CKAN datastore_search. "
100
+ "Use column values that make sense for the dataset."
101
+ ),
102
+ ),
103
+ ] = None,
104
+ filters: Annotated[
105
+ dict[str, Any] | None,
106
+ Field(
107
+ description=(
108
+ "Exact-match filters keyed by column name, e.g. {'town': 'ANG MO KIO'}. "
109
+ "Use column names from get_dataset_metadata."
110
+ ),
111
+ ),
112
+ ] = None,
113
+ sort: Annotated[
114
+ str | None,
115
+ Field(
116
+ description=(
117
+ "Sort order for results, e.g. 'month desc'. "
118
+ "Use column names from get_dataset_metadata."
119
+ ),
120
+ ),
121
+ ] = None,
122
+ limit: Annotated[
123
+ int,
124
+ Field(
125
+ description="Maximum matching rows to return. Capped at 50. Default 20.",
126
+ ge=1,
127
+ le=50,
128
+ ),
129
+ ] = 20,
130
+ offset: Annotated[
131
+ int,
132
+ Field(
133
+ description="Number of matching rows to skip for pagination. Default 0.",
134
+ ge=0,
135
+ ),
136
+ ] = 0,
137
+ ) -> str:
138
+ """Search rows within a dataset using CKAN datastore search.
139
+
140
+ Returns JSON:
141
+ - resource_id (str): Dataset resource ID searched
142
+ - fields (list): Column id/type definitions for the records
143
+ - records (list[object]): Matching rows keyed by column name
144
+ - total (int): Total matches for the query
145
+ - limit (int): Applied page size
146
+ - offset (int): Applied offset
147
+ - links.next (str | null): Relative URL for the next page, if any
148
+ """
149
+ return tools.search_dataset_rows(
150
+ dataset_id,
151
+ q=q,
152
+ filters=filters,
153
+ sort=sort,
154
+ limit=limit,
155
+ offset=offset,
156
+ )
157
+
158
+
159
+ @mcp.tool()
160
+ def get_dataset_download_url(
161
+ dataset_id: DatasetId,
162
+ skip_initiate: Annotated[
163
+ bool,
164
+ Field(
165
+ description=(
166
+ "Poll for an existing export without first requesting a CSV export. "
167
+ "Useful for non-CSV datasets such as GeoJSON or KML."
168
+ ),
169
+ ),
170
+ ] = False,
171
+ ) -> str:
172
+ """Get a temporary URL for the full dataset export.
173
+
174
+ Returns JSON:
175
+ - dataset_id (str): Requested dataset ID
176
+ - url (str): Temporary download URL valid for a short period
177
+ """
178
+ return tools.get_dataset_download_url(dataset_id, skip_initiate=skip_initiate)
179
+
180
+
181
+ @mcp.tool()
182
+ def download_dataset_file(
183
+ dataset_id: DatasetId,
184
+ filename: Annotated[
185
+ str | None,
186
+ Field(
187
+ description=(
188
+ "Optional local filename under the MCP cache directory. "
189
+ "Defaults to {dataset_id}.csv."
190
+ ),
191
+ ),
192
+ ] = None,
193
+ skip_initiate: Annotated[
194
+ bool,
195
+ Field(
196
+ description=(
197
+ "Poll for an existing export without first requesting a CSV export. "
198
+ "Useful for non-CSV datasets such as GeoJSON or KML."
199
+ ),
200
+ ),
201
+ ] = False,
202
+ ) -> str:
203
+ """Download the full dataset to a local cache file for offline analysis.
204
+
205
+ Returns JSON:
206
+ - dataset_id (str): Requested dataset ID
207
+ - local_path (str): Absolute path to the downloaded file on this machine
208
+ - name (str): Dataset title
209
+ - format (str | null): Dataset format, e.g. CSV
210
+ """
211
+ return tools.download_dataset_file(
212
+ dataset_id,
213
+ filename=filename,
214
+ skip_initiate=skip_initiate,
215
+ )
216
+
217
+
218
+ @mcp.tool()
219
+ def list_realtime_datasets() -> str:
220
+ """List supported realtime dataset names.
221
+
222
+ Returns JSON: array of descriptive dataset_name strings.
223
+ """
224
+ return tools.list_realtime_datasets()
225
+
226
+
227
+ @mcp.tool()
228
+ def describe_realtime_dataset(
229
+ dataset_name: Annotated[
230
+ str,
231
+ Field(
232
+ description=(
233
+ "Descriptive realtime dataset name, "
234
+ "e.g. air_quality_pm25_hourly_by_region."
235
+ ),
236
+ ),
237
+ ],
238
+ ) -> str:
239
+ """Describe a realtime dataset's parameters and response fields.
240
+
241
+ Returns JSON:
242
+ - dataset_name (str): Canonical dataset identifier
243
+ - title, summary, description (str): Human-readable dataset documentation
244
+ - parameters (list): Allowed query parameters with types and descriptions
245
+ - response_fields (list): Top-level response fields and their meanings
246
+ - example_request (object): Example dataset_name and parameters
247
+ """
248
+ return tools.describe_realtime_dataset(dataset_name=dataset_name)
249
+
250
+
251
+ @mcp.tool()
252
+ def fetch_realtime_data(
253
+ dataset_name: Annotated[
254
+ str,
255
+ Field(
256
+ description=(
257
+ "Descriptive realtime dataset name, "
258
+ "e.g. air_quality_pm25_hourly_by_region."
259
+ ),
260
+ ),
261
+ ],
262
+ parameters: Annotated[
263
+ dict[str, Any] | None,
264
+ Field(
265
+ description=(
266
+ "Query parameters for the chosen realtime dataset. Keys must match "
267
+ "the parameter names returned by describe_realtime_dataset, such as date "
268
+ "or paginationToken."
269
+ ),
270
+ ),
271
+ ] = None,
272
+ ) -> str:
273
+ """Fetch data from a data.gov.sg realtime API.
274
+
275
+ Returns JSON:
276
+ - dataset_name (str): Resolved dataset identifier
277
+ - title (str): Human-readable dataset title
278
+ - parameters (object): Parameters sent to the API
279
+ - data (object): Raw realtime payload from data.gov.sg
280
+ """
281
+ return tools.fetch_realtime_data(dataset_name=dataset_name, parameters=parameters)
282
+
283
+
284
+ def main() -> None:
285
+ try:
286
+ mcp.run(transport="stdio")
287
+ finally:
288
+ tools.close_client()
289
+
290
+
291
+ if __name__ == "__main__":
292
+ main()
@@ -0,0 +1,150 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from typing import Any
5
+
6
+ from publicsgdata import DataGovSGClient
7
+ from publicsgdata.datagovsg._request import DataGovSGHost
8
+ from publicsgdata_mcp.cache import dataset_cache_path
9
+ from publicsgdata_mcp.realtime_catalog import (
10
+ describe_realtime_api,
11
+ list_realtime_dataset_names,
12
+ normalize_realtime_parameters,
13
+ resolve_realtime_api,
14
+ )
15
+
16
+ MAX_PREVIEW_ROWS = 50
17
+ MAX_SEARCH_ROWS = 50
18
+
19
+ _client: DataGovSGClient | None = None
20
+
21
+
22
+ def get_client() -> DataGovSGClient:
23
+ global _client
24
+ if _client is None:
25
+ _client = DataGovSGClient()
26
+ return _client
27
+
28
+
29
+ def close_client() -> None:
30
+ global _client
31
+ if _client is not None:
32
+ _client.close()
33
+ _client = None
34
+
35
+
36
+ def _json(data: Any) -> str:
37
+ if hasattr(data, "model_dump"):
38
+ return json.dumps(data.model_dump(), default=str)
39
+ return json.dumps(data, default=str)
40
+
41
+
42
+ def list_datasets(page: int | None = None) -> str:
43
+ response = get_client().datasets.list(page=page)
44
+ return _json(response)
45
+
46
+
47
+ def get_dataset_metadata(dataset_id: str) -> str:
48
+ metadata = get_client().datasets.get_metadata(dataset_id)
49
+ return _json(metadata)
50
+
51
+
52
+ def preview_dataset_rows(
53
+ dataset_id: str,
54
+ *,
55
+ limit: int = 10,
56
+ cursor: str | None = None,
57
+ ) -> str:
58
+ bounded_limit = min(max(limit, 1), MAX_PREVIEW_ROWS)
59
+ rows = get_client().datasets.list_rows(dataset_id, limit=bounded_limit, cursor=cursor)
60
+ return _json(rows)
61
+
62
+
63
+ def search_dataset_rows(
64
+ dataset_id: str,
65
+ *,
66
+ q: str | None = None,
67
+ filters: dict[str, Any] | None = None,
68
+ sort: str | None = None,
69
+ limit: int = 20,
70
+ offset: int = 0,
71
+ ) -> str:
72
+ bounded_limit = min(max(limit, 1), MAX_SEARCH_ROWS)
73
+ result = get_client().datasets.search(
74
+ dataset_id,
75
+ q=q,
76
+ filters=filters,
77
+ sort=sort,
78
+ limit=bounded_limit,
79
+ offset=offset,
80
+ )
81
+ return _json(result)
82
+
83
+
84
+ def get_dataset_download_url(
85
+ dataset_id: str,
86
+ *,
87
+ skip_initiate: bool = False,
88
+ ) -> str:
89
+ url = get_client().datasets.get_download_url(dataset_id, skip_initiate=skip_initiate)
90
+ return _json({"dataset_id": dataset_id, "url": url})
91
+
92
+
93
+ def download_dataset_file(
94
+ dataset_id: str,
95
+ *,
96
+ filename: str | None = None,
97
+ skip_initiate: bool = False,
98
+ ) -> str:
99
+ destination = dataset_cache_path(dataset_id, filename=filename)
100
+ path = get_client().datasets.download_file(
101
+ dataset_id,
102
+ destination,
103
+ skip_initiate=skip_initiate,
104
+ )
105
+ metadata = get_client().datasets.get_metadata(dataset_id)
106
+ return _json(
107
+ {
108
+ "dataset_id": dataset_id,
109
+ "local_path": str(path),
110
+ "name": metadata.name,
111
+ "format": metadata.format,
112
+ }
113
+ )
114
+
115
+
116
+ def list_realtime_datasets() -> str:
117
+ return _json(list_realtime_dataset_names())
118
+
119
+
120
+ def describe_realtime_dataset(dataset_name: str) -> str:
121
+ return _json(describe_realtime_api(dataset_name))
122
+
123
+
124
+ def fetch_realtime_data(
125
+ dataset_name: str,
126
+ parameters: dict[str, Any] | None = None,
127
+ ) -> str:
128
+ api = resolve_realtime_api(dataset_name)
129
+ params = normalize_realtime_parameters(api, parameters)
130
+ client = get_client()
131
+
132
+ if api.host == "v2_realtime":
133
+ payload = client._request_json(
134
+ "GET",
135
+ DataGovSGHost.REALTIME,
136
+ api.path,
137
+ params=params or None,
138
+ )
139
+ data = client._realtime_data(payload)
140
+ else:
141
+ raise ValueError(f"Unsupported realtime host {api.host!r} for {api.dataset_name}")
142
+
143
+ return _json(
144
+ {
145
+ "dataset_name": api.dataset_name,
146
+ "title": api.title,
147
+ "parameters": parameters or {},
148
+ "data": data,
149
+ }
150
+ )
@@ -0,0 +1,153 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from collections.abc import Generator
5
+ from pathlib import Path
6
+ from unittest.mock import patch
7
+
8
+ import httpx
9
+ import pytest
10
+
11
+ from publicsgdata_mcp import tools
12
+
13
+
14
+ @pytest.fixture(autouse=True)
15
+ def reset_client() -> Generator[None, None, None]:
16
+ tools.close_client()
17
+ yield
18
+ tools.close_client()
19
+
20
+
21
+ def test_list_datasets() -> None:
22
+ fixture = {
23
+ "code": 0,
24
+ "data": {
25
+ "datasets": [
26
+ {
27
+ "datasetId": "d_test",
28
+ "name": "Test Dataset",
29
+ }
30
+ ]
31
+ },
32
+ "errorMsg": "",
33
+ }
34
+
35
+ def handler(request: httpx.Request) -> httpx.Response:
36
+ if request.url.path.endswith("/datasets"):
37
+ return httpx.Response(200, json=fixture)
38
+ return httpx.Response(404, json={"message": "not found"})
39
+
40
+ transport = httpx.MockTransport(handler)
41
+ with patch.object(tools, "get_client") as mock_get_client:
42
+ from publicsgdata import DataGovSGClient
43
+
44
+ client = DataGovSGClient(http_client=httpx.Client(transport=transport))
45
+ mock_get_client.return_value = client
46
+ payload = json.loads(tools.list_datasets())
47
+ assert payload["datasets"][0]["dataset_id"] == "d_test"
48
+ client.close()
49
+
50
+
51
+ def test_download_dataset_file(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
52
+ monkeypatch.setenv("PUBLICSGDATA_MCP_CACHE_DIR", str(tmp_path))
53
+
54
+ initiate = {"code": 0, "data": {"message": "ok"}, "errorMsg": ""}
55
+ poll = {
56
+ "code": 0,
57
+ "data": {"status": "READY", "url": "https://example.com/data.csv"},
58
+ "errorMsg": "",
59
+ }
60
+ metadata = {
61
+ "code": 0,
62
+ "data": {
63
+ "datasetId": "d_test",
64
+ "name": "Test Dataset",
65
+ "format": "CSV",
66
+ },
67
+ "errorMsg": "",
68
+ }
69
+
70
+ def handler(request: httpx.Request) -> httpx.Response:
71
+ path = request.url.path
72
+ if path.endswith("/initiate-download"):
73
+ return httpx.Response(200, json=initiate)
74
+ if path.endswith("/poll-download"):
75
+ return httpx.Response(200, json=poll)
76
+ if path.endswith("/metadata"):
77
+ return httpx.Response(200, json=metadata)
78
+ if request.url.host == "example.com":
79
+ return httpx.Response(200, content=b"col\n1\n")
80
+ return httpx.Response(404, json={"message": "not found"})
81
+
82
+ transport = httpx.MockTransport(handler)
83
+ with patch.object(tools, "get_client") as mock_get_client:
84
+ from publicsgdata import DataGovSGClient
85
+
86
+ client = DataGovSGClient(http_client=httpx.Client(transport=transport))
87
+ mock_get_client.return_value = client
88
+ payload = json.loads(tools.download_dataset_file("d_test"))
89
+ assert payload["dataset_id"] == "d_test"
90
+ assert payload["local_path"].endswith("d_test.csv")
91
+ assert (tmp_path / "datasets" / "d_test" / "d_test.csv").exists()
92
+ client.close()
93
+
94
+
95
+ def test_list_realtime_datasets() -> None:
96
+ payload = json.loads(tools.list_realtime_datasets())
97
+ assert payload == ["air_quality_pm25_hourly_by_region"]
98
+
99
+
100
+ def test_describe_realtime_dataset_one_api() -> None:
101
+ payload = json.loads(tools.describe_realtime_dataset("pm25"))
102
+ assert payload["dataset_name"] == "air_quality_pm25_hourly_by_region"
103
+ param_names = [param["name"] for param in payload["parameters"]]
104
+ assert "date" in param_names
105
+ assert "paginationToken" in param_names
106
+
107
+
108
+ def test_fetch_realtime_data_pm25() -> None:
109
+ fixture = {
110
+ "code": 0,
111
+ "data": {
112
+ "regionMetadata": [
113
+ {
114
+ "name": "central",
115
+ "labelLocation": {"latitude": 1.35, "longitude": 103.82},
116
+ }
117
+ ],
118
+ "items": [
119
+ {
120
+ "date": "2026-06-09",
121
+ "timestamp": "2026-06-09T11:00:00+08:00",
122
+ "readings": {"pm25_one_hourly": {"central": 17}},
123
+ }
124
+ ],
125
+ },
126
+ "errorMsg": "",
127
+ }
128
+
129
+ def handler(request: httpx.Request) -> httpx.Response:
130
+ if request.url.path.endswith("/pm25"):
131
+ return httpx.Response(200, json=fixture)
132
+ return httpx.Response(404, json={"message": "not found"})
133
+
134
+ transport = httpx.MockTransport(handler)
135
+ with patch.object(tools, "get_client") as mock_get_client:
136
+ from publicsgdata import DataGovSGClient
137
+
138
+ client = DataGovSGClient(http_client=httpx.Client(transport=transport))
139
+ mock_get_client.return_value = client
140
+ payload = json.loads(
141
+ tools.fetch_realtime_data("air_quality_pm25_hourly_by_region")
142
+ )
143
+ assert payload["dataset_name"] == "air_quality_pm25_hourly_by_region"
144
+ assert payload["data"]["items"][0]["readings"]["pm25_one_hourly"]["central"] == 17
145
+ client.close()
146
+
147
+
148
+ def test_fetch_realtime_data_rejects_unknown_parameter() -> None:
149
+ with pytest.raises(ValueError, match="Unknown parameters"):
150
+ tools.fetch_realtime_data(
151
+ "air_quality_pm25_hourly_by_region",
152
+ parameters={"bogus": "value"},
153
+ )