PyPI - publicsgdata-mcp - Versions diffs - 0.2.0__tar.gz - Mend

publicsgdata-mcp 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

publicsgdata_mcp-0.2.0/.gitignore +21 -0
publicsgdata_mcp-0.2.0/CHANGELOG.md +21 -0
publicsgdata_mcp-0.2.0/PKG-INFO +57 -0
publicsgdata_mcp-0.2.0/README.md +45 -0
publicsgdata_mcp-0.2.0/pyproject.toml +29 -0
publicsgdata_mcp-0.2.0/src/publicsgdata_mcp/__init__.py +3 -0
publicsgdata_mcp-0.2.0/src/publicsgdata_mcp/cache.py +24 -0
publicsgdata_mcp-0.2.0/src/publicsgdata_mcp/realtime_catalog.py +195 -0
publicsgdata_mcp-0.2.0/src/publicsgdata_mcp/server.py +292 -0
publicsgdata_mcp-0.2.0/src/publicsgdata_mcp/tools.py +150 -0
publicsgdata_mcp-0.2.0/tests/test_tools.py +153 -0

publicsgdata_mcp-0.2.0/.gitignore ADDED Viewed

@@ -0,0 +1,21 @@
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+.venv/
+venv/
+env/
+dist/
+build/
+*.egg-info/
+.mypy_cache/
+.ruff_cache/
+.pytest_cache/
+.coverage
+coverage.xml
+htmlcov/
+*.log
+.DS_Store
+.idea/
+.vscode/

publicsgdata_mcp-0.2.0/CHANGELOG.md ADDED Viewed

@@ -0,0 +1,21 @@
+# Changelog
+Format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [0.2.0](https://github.com/harrytran001/publicsgdata/compare/publicsgdata-mcp-v0.1.0...publicsgdata-mcp-v0.2.0) (2026-06-09)
+### Features
+* add publicsgdata-mcp local stdio server. ([d72a155](https://github.com/harrytran001/publicsgdata/commit/d72a155fde4d6634023065dc54025a396fbd9ffa))
+* generalize realtime MCP tools ([c9d2d36](https://github.com/harrytran001/publicsgdata/commit/c9d2d36104e7d9518b81de2278d448ea18990b51))
+* refactor packages + add mcp server ([272ad97](https://github.com/harrytran001/publicsgdata/commit/272ad97e809413fe07b9ac0a8c1d5ebcbbed8d56))
+## [Unreleased]
+## [0.1.0] - 2026-06-09
+### Added
+- Local stdio MCP server for data.gov.sg catalog preview, search, and full dataset download
+- Tools: `list_datasets`, `get_dataset_metadata`, `preview_dataset_rows`, `search_dataset_rows`, `get_dataset_download_url`, `download_dataset_file`, `list_realtime_datasets`, `describe_realtime_dataset`, `fetch_realtime_data`

publicsgdata_mcp-0.2.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,57 @@
+Metadata-Version: 2.4
+Name: publicsgdata-mcp
+Version: 0.2.0
+Summary: Local MCP server for Singapore government open data via publicsgdata
+Author: publicsgdata contributors
+License-Expression: MIT
+Keywords: ai,data.gov.sg,mcp,open-data,singapore
+Requires-Python: >=3.10
+Requires-Dist: mcp[cli]>=1.4
+Requires-Dist: publicsgdata
+Description-Content-Type: text/markdown
+# publicsgdata-mcp
+Local [Model Context Protocol](https://modelcontextprotocol.io/) server for exploring and downloading Singapore government open data through the `publicsgdata` SDK.
+## Cursor config
+Add to `.cursor/mcp.json`:
+```json
+{
+  "mcpServers": {
+    "publicsgdata": {
+      "type": "stdio",
+      "command": "uvx",
+      "args": ["publicsgdata-mcp"],
+      "env": {
+        "DATA_GOV_SG_API_KEY": "${env:DATA_GOV_SG_API_KEY}"
+      }
+    }
+  }
+}
+```
+## Tools
+| Tool | Purpose |
+|------|---------|
+| `list_datasets` | Browse the catalog |
+| `get_dataset_metadata` | Schema, size, coverage |
+| `preview_dataset_rows` | Small sample for inspection |
+| `search_dataset_rows` | Filter/search within a dataset |
+| `get_dataset_download_url` | Temporary URL for full export |
+| `download_dataset_file` | Save full dataset locally |
+| `list_realtime_datasets` | List supported realtime dataset names |
+| `describe_realtime_dataset` | Full parameter and response docs for one realtime dataset |
+| `fetch_realtime_data` | Fetch data from a realtime API by `dataset_name` |
+Downloads are cached under `~/.cache/publicsgdata-mcp` by default. Override with `PUBLICSGDATA_MCP_CACHE_DIR`.
+## Environment variables
+| Variable | Required | Description |
+|----------|----------|-------------|
+| `DATA_GOV_SG_API_KEY` | No | Higher rate limits for data.gov.sg |
+| `PUBLICSGDATA_MCP_CACHE_DIR` | No | Override download cache directory |

publicsgdata_mcp-0.2.0/README.md ADDED Viewed

@@ -0,0 +1,45 @@
+# publicsgdata-mcp
+Local [Model Context Protocol](https://modelcontextprotocol.io/) server for exploring and downloading Singapore government open data through the `publicsgdata` SDK.
+## Cursor config
+Add to `.cursor/mcp.json`:
+```json
+{
+  "mcpServers": {
+    "publicsgdata": {
+      "type": "stdio",
+      "command": "uvx",
+      "args": ["publicsgdata-mcp"],
+      "env": {
+        "DATA_GOV_SG_API_KEY": "${env:DATA_GOV_SG_API_KEY}"
+      }
+    }
+  }
+}
+```
+## Tools
+| Tool | Purpose |
+|------|---------|
+| `list_datasets` | Browse the catalog |
+| `get_dataset_metadata` | Schema, size, coverage |
+| `preview_dataset_rows` | Small sample for inspection |
+| `search_dataset_rows` | Filter/search within a dataset |
+| `get_dataset_download_url` | Temporary URL for full export |
+| `download_dataset_file` | Save full dataset locally |
+| `list_realtime_datasets` | List supported realtime dataset names |
+| `describe_realtime_dataset` | Full parameter and response docs for one realtime dataset |
+| `fetch_realtime_data` | Fetch data from a realtime API by `dataset_name` |
+Downloads are cached under `~/.cache/publicsgdata-mcp` by default. Override with `PUBLICSGDATA_MCP_CACHE_DIR`.
+## Environment variables
+| Variable | Required | Description |
+|----------|----------|-------------|
+| `DATA_GOV_SG_API_KEY` | No | Higher rate limits for data.gov.sg |
+| `PUBLICSGDATA_MCP_CACHE_DIR` | No | Override download cache directory |

publicsgdata_mcp-0.2.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,29 @@
+[project]
+name = "publicsgdata-mcp"
+version = "0.2.0"
+description = "Local MCP server for Singapore government open data via publicsgdata"
+readme = "README.md"
+license = "MIT"
+requires-python = ">=3.10"
+authors = [{ name = "publicsgdata contributors" }]
+keywords = ["singapore", "open-data", "data.gov.sg", "mcp", "ai"]
+dependencies = [
+    "mcp[cli]>=1.4",
+    "publicsgdata",
+]
+[project.scripts]
+publicsgdata-mcp = "publicsgdata_mcp.server:main"
+[build-system]
+requires = ["hatchling>=1.26"]
+build-backend = "hatchling.build"
+[tool.hatch.build.targets.wheel]
+packages = ["src/publicsgdata_mcp"]
+[tool.uv.sources]
+publicsgdata = { workspace = true }
+[tool.pytest.ini_options]
+testpaths = ["tests"]

publicsgdata_mcp-0.2.0/src/publicsgdata_mcp/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""publicsgdata MCP server."""
+__version__ = "0.2.0"

publicsgdata_mcp-0.2.0/src/publicsgdata_mcp/cache.py ADDED Viewed

@@ -0,0 +1,24 @@
+from __future__ import annotations
+import os
+from pathlib import Path
+ENV_CACHE_DIR = "PUBLICSGDATA_MCP_CACHE_DIR"
+def default_cache_dir() -> Path:
+    override = os.environ.get(ENV_CACHE_DIR)
+    if override:
+        return Path(override).expanduser()
+    return Path.home() / ".cache" / "publicsgdata-mcp"
+def dataset_cache_path(dataset_id: str, *, filename: str | None = None) -> Path:
+    cache_dir = default_cache_dir() / "datasets" / dataset_id
+    cache_dir.mkdir(parents=True, exist_ok=True)
+    if filename:
+        safe_filename = Path(filename).name
+        if not safe_filename:
+            raise ValueError("filename must include a file name")
+        return cache_dir / safe_filename
+    return cache_dir / f"{dataset_id}.csv"

publicsgdata_mcp-0.2.0/src/publicsgdata_mcp/realtime_catalog.py ADDED Viewed

@@ -0,0 +1,195 @@
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Any
+@dataclass(frozen=True)
+class RealtimeParameter:
+    name: str
+    type: str
+    required: bool
+    description: str
+    example: str | None = None
+@dataclass(frozen=True)
+class RealtimeResponseField:
+    name: str
+    type: str
+    description: str
+@dataclass(frozen=True)
+class RealtimeApiDefinition:
+    dataset_name: str
+    title: str
+    summary: str
+    description: str
+    path: str
+    host: str
+    update_frequency: str | None
+    parameters: tuple[RealtimeParameter, ...]
+    response_fields: tuple[RealtimeResponseField, ...]
+    response_notes: str
+    dataset_url: str | None = None
+    aliases: tuple[str, ...] = ()
+REALTIME_APIS: tuple[RealtimeApiDefinition, ...] = (
+    RealtimeApiDefinition(
+        dataset_name="air_quality_pm25_hourly_by_region",
+        title="PM2.5 hourly readings by region",
+        summary="Hourly PM2.5 air quality readings for Singapore regions from NEA.",
+        description=(
+            "Returns the latest or historical PM2.5 readings from the data.gov.sg v2 "
+            "real-time API. Readings are grouped by major regions such as north, south, "
+            "east, west, and central, with map label coordinates in region_metadata."
+        ),
+        path="/pm25",
+        host="v2_realtime",
+        update_frequency="Hourly",
+        parameters=(
+            RealtimeParameter(
+                name="date",
+                type="string",
+                required=False,
+                description=(
+                    "SGT date or datetime filter. Use YYYY-MM-DD for all readings on a day, "
+                    "or YYYY-MM-DDTHH:MM:SS for readings at a specific moment. "
+                    "Omit to fetch the latest reading."
+                ),
+                example="2024-07-16",
+            ),
+            RealtimeParameter(
+                name="paginationToken",
+                type="string",
+                required=False,
+                description=(
+                    "Pagination token from a previous response when requesting a full day "
+                    "or large historical range."
+                ),
+            ),
+        ),
+        response_fields=(
+            RealtimeResponseField(
+                name="region_metadata",
+                type="array",
+                description="Regions with name and labelLocation latitude/longitude for mapping.",
+            ),
+            RealtimeResponseField(
+                name="items",
+                type="array",
+                description=(
+                    "Reading snapshots. Each item includes date, timestamp, "
+                    "updatedTimestamp, and readings.pm25_one_hourly by region."
+                ),
+            ),
+            RealtimeResponseField(
+                name="pagination_token",
+                type="string|null",
+                description="Token for the next page when more historical readings exist.",
+            ),
+        ),
+        response_notes="Units are µg/m3. Latest reading is returned when date is omitted.",
+        dataset_url="https://data.gov.sg/datasets?formats=API",
+        aliases=("pm25", "pm2_5", "air_quality"),
+    ),
+)
+def _index_apis() -> dict[str, RealtimeApiDefinition]:
+    indexed: dict[str, RealtimeApiDefinition] = {}
+    for api in REALTIME_APIS:
+        indexed[api.dataset_name.lower()] = api
+        for alias in api.aliases:
+            indexed[alias.lower()] = api
+    return indexed
+_API_INDEX = _index_apis()
+def list_realtime_dataset_names() -> list[str]:
+    return [api.dataset_name for api in REALTIME_APIS]
+def resolve_realtime_api(dataset_name: str) -> RealtimeApiDefinition:
+    key = dataset_name.strip().lower()
+    api = _API_INDEX.get(key)
+    if api is None:
+        known = ", ".join(list_realtime_dataset_names())
+        raise ValueError(
+            f"Unknown realtime dataset_name {dataset_name!r}. Known datasets: {known}"
+        )
+    return api
+def describe_realtime_api(dataset_name: str) -> dict[str, Any]:
+    api = resolve_realtime_api(dataset_name)
+    return {
+        "dataset_name": api.dataset_name,
+        "title": api.title,
+        "summary": api.summary,
+        "description": api.description,
+        "host": api.host,
+        "path": api.path,
+        "update_frequency": api.update_frequency,
+        "dataset_url": api.dataset_url,
+        "aliases": list(api.aliases),
+        "parameters": [
+            {
+                "name": param.name,
+                "type": param.type,
+                "required": param.required,
+                "description": param.description,
+                "example": param.example,
+            }
+            for param in api.parameters
+        ],
+        "response_fields": [
+            {
+                "name": field.name,
+                "type": field.type,
+                "description": field.description,
+            }
+            for field in api.response_fields
+        ],
+        "response_notes": api.response_notes,
+        "example_request": {
+            "dataset_name": api.dataset_name,
+            "parameters": {
+                param.name: param.example
+                for param in api.parameters
+                if param.example is not None
+            },
+        },
+    }
+def normalize_realtime_parameters(
+    api: RealtimeApiDefinition,
+    parameters: dict[str, Any] | None,
+) -> dict[str, str]:
+    incoming = parameters or {}
+    allowed = {param.name for param in api.parameters}
+    unknown = sorted(set(incoming) - allowed)
+    if unknown:
+        raise ValueError(
+            f"Unknown parameters for {api.dataset_name}: {unknown}. "
+            f"Allowed: {sorted(allowed)}"
+        )
+    missing = [
+        param.name
+        for param in api.parameters
+        if param.required and param.name not in incoming
+    ]
+    if missing:
+        raise ValueError(f"Missing required parameters for {api.dataset_name}: {missing}")
+    encoded: dict[str, str] = {}
+    for key, value in incoming.items():
+        if value is not None:
+            encoded[key] = str(value)
+    return encoded

publicsgdata_mcp-0.2.0/src/publicsgdata_mcp/server.py ADDED Viewed

@@ -0,0 +1,292 @@
+from __future__ import annotations
+from typing import Annotated, Any
+from mcp.server.fastmcp import FastMCP
+from pydantic import Field
+from publicsgdata_mcp import tools
+mcp = FastMCP("publicsgdata")
+DatasetId = Annotated[
+    str,
+    Field(
+        description=(
+            "data.gov.sg dataset ID (starts with d_). "
+            "Example: d_8b84c4ee58e3cfc0ece0d773c8ca6abc for HDB resale prices."
+        ),
+    ),
+]
+@mcp.tool()
+def list_datasets(
+    page: Annotated[
+        int | None,
+        Field(
+            description="Optional 1-based catalog page number. Omit to fetch the first page.",
+        ),
+    ] = None,
+) -> str:
+    """List datasets available on data.gov.sg.
+    Returns JSON:
+    - datasets (list): Catalog entries with dataset_id, name, format, status, coverage dates
+    - pages (int | null): Total catalog pages when paginated
+    """
+    return tools.list_datasets(page=page)
+@mcp.tool()
+def get_dataset_metadata(dataset_id: DatasetId) -> str:
+    """Get metadata for a dataset, including column definitions when available.
+    Returns JSON:
+    - dataset_id (str): Dataset identifier
+    - name (str): Dataset title
+    - description (str | null): Dataset summary
+    - format (str | null): File/API format, e.g. CSV
+    - dataset_size (int | null): Approximate row count
+    - coverage_start / coverage_end (str | null): Data time range
+    - column_metadata (object | null): Column names and types for interpreting rows
+    """
+    return tools.get_dataset_metadata(dataset_id)
+@mcp.tool()
+def preview_dataset_rows(
+    dataset_id: DatasetId,
+    limit: Annotated[
+        int,
+        Field(
+            description="Number of rows to preview. Capped at 50. Default 10.",
+            ge=1,
+            le=50,
+        ),
+    ] = 10,
+    cursor: Annotated[
+        str | None,
+        Field(
+            description=(
+                "Pagination cursor from a previous response links.next value. "
+                "Omit on the first request."
+            ),
+        ),
+    ] = None,
+) -> str:
+    """Preview a bounded sample of rows from a dataset.
+    Use this before downloading the full file. Row keys match the dataset columns.
+    Returns JSON:
+    - dataset_id (str): Requested dataset ID
+    - dataset_name (str | null): Dataset title
+    - rows (list[object]): Sample records; each object is one row keyed by column name
+    - limit (int): Applied row limit
+    - links.next (str | null): Cursor for the next page, if more rows exist
+    """
+    return tools.preview_dataset_rows(dataset_id, limit=limit, cursor=cursor)
+@mcp.tool()
+def search_dataset_rows(
+    dataset_id: DatasetId,
+    q: Annotated[
+        str | None,
+        Field(
+            description=(
+                "Full-text search query for CKAN datastore_search. "
+                "Use column values that make sense for the dataset."
+            ),
+        ),
+    ] = None,
+    filters: Annotated[
+        dict[str, Any] | None,
+        Field(
+            description=(
+                "Exact-match filters keyed by column name, e.g. {'town': 'ANG MO KIO'}. "
+                "Use column names from get_dataset_metadata."
+            ),
+        ),
+    ] = None,
+    sort: Annotated[
+        str | None,
+        Field(
+            description=(
+                "Sort order for results, e.g. 'month desc'. "
+                "Use column names from get_dataset_metadata."
+            ),
+        ),
+    ] = None,
+    limit: Annotated[
+        int,
+        Field(
+            description="Maximum matching rows to return. Capped at 50. Default 20.",
+            ge=1,
+            le=50,
+        ),
+    ] = 20,
+    offset: Annotated[
+        int,
+        Field(
+            description="Number of matching rows to skip for pagination. Default 0.",
+            ge=0,
+        ),
+    ] = 0,
+) -> str:
+    """Search rows within a dataset using CKAN datastore search.
+    Returns JSON:
+    - resource_id (str): Dataset resource ID searched
+    - fields (list): Column id/type definitions for the records
+    - records (list[object]): Matching rows keyed by column name
+    - total (int): Total matches for the query
+    - limit (int): Applied page size
+    - offset (int): Applied offset
+    - links.next (str | null): Relative URL for the next page, if any
+    """
+    return tools.search_dataset_rows(
+        dataset_id,
+        q=q,
+        filters=filters,
+        sort=sort,
+        limit=limit,
+        offset=offset,
+    )
+@mcp.tool()
+def get_dataset_download_url(
+    dataset_id: DatasetId,
+    skip_initiate: Annotated[
+        bool,
+        Field(
+            description=(
+                "Poll for an existing export without first requesting a CSV export. "
+                "Useful for non-CSV datasets such as GeoJSON or KML."
+            ),
+        ),
+    ] = False,
+) -> str:
+    """Get a temporary URL for the full dataset export.
+    Returns JSON:
+    - dataset_id (str): Requested dataset ID
+    - url (str): Temporary download URL valid for a short period
+    """
+    return tools.get_dataset_download_url(dataset_id, skip_initiate=skip_initiate)
+@mcp.tool()
+def download_dataset_file(
+    dataset_id: DatasetId,
+    filename: Annotated[
+        str | None,
+        Field(
+            description=(
+                "Optional local filename under the MCP cache directory. "
+                "Defaults to {dataset_id}.csv."
+            ),
+        ),
+    ] = None,
+    skip_initiate: Annotated[
+        bool,
+        Field(
+            description=(
+                "Poll for an existing export without first requesting a CSV export. "
+                "Useful for non-CSV datasets such as GeoJSON or KML."
+            ),
+        ),
+    ] = False,
+) -> str:
+    """Download the full dataset to a local cache file for offline analysis.
+    Returns JSON:
+    - dataset_id (str): Requested dataset ID
+    - local_path (str): Absolute path to the downloaded file on this machine
+    - name (str): Dataset title
+    - format (str | null): Dataset format, e.g. CSV
+    """
+    return tools.download_dataset_file(
+        dataset_id,
+        filename=filename,
+        skip_initiate=skip_initiate,
+    )
+@mcp.tool()
+def list_realtime_datasets() -> str:
+    """List supported realtime dataset names.
+    Returns JSON: array of descriptive dataset_name strings.
+    """
+    return tools.list_realtime_datasets()
+@mcp.tool()
+def describe_realtime_dataset(
+    dataset_name: Annotated[
+        str,
+        Field(
+            description=(
+                "Descriptive realtime dataset name, "
+                "e.g. air_quality_pm25_hourly_by_region."
+            ),
+        ),
+    ],
+) -> str:
+    """Describe a realtime dataset's parameters and response fields.
+    Returns JSON:
+    - dataset_name (str): Canonical dataset identifier
+    - title, summary, description (str): Human-readable dataset documentation
+    - parameters (list): Allowed query parameters with types and descriptions
+    - response_fields (list): Top-level response fields and their meanings
+    - example_request (object): Example dataset_name and parameters
+    """
+    return tools.describe_realtime_dataset(dataset_name=dataset_name)
+@mcp.tool()
+def fetch_realtime_data(
+    dataset_name: Annotated[
+        str,
+        Field(
+            description=(
+                "Descriptive realtime dataset name, "
+                "e.g. air_quality_pm25_hourly_by_region."
+            ),
+        ),
+    ],
+    parameters: Annotated[
+        dict[str, Any] | None,
+        Field(
+            description=(
+                "Query parameters for the chosen realtime dataset. Keys must match "
+                "the parameter names returned by describe_realtime_dataset, such as date "
+                "or paginationToken."
+            ),
+        ),
+    ] = None,
+) -> str:
+    """Fetch data from a data.gov.sg realtime API.
+    Returns JSON:
+    - dataset_name (str): Resolved dataset identifier
+    - title (str): Human-readable dataset title
+    - parameters (object): Parameters sent to the API
+    - data (object): Raw realtime payload from data.gov.sg
+    """
+    return tools.fetch_realtime_data(dataset_name=dataset_name, parameters=parameters)
+def main() -> None:
+    try:
+        mcp.run(transport="stdio")
+    finally:
+        tools.close_client()
+if __name__ == "__main__":
+    main()

publicsgdata_mcp-0.2.0/src/publicsgdata_mcp/tools.py ADDED Viewed

@@ -0,0 +1,150 @@
+from __future__ import annotations
+import json
+from typing import Any
+from publicsgdata import DataGovSGClient
+from publicsgdata.datagovsg._request import DataGovSGHost
+from publicsgdata_mcp.cache import dataset_cache_path
+from publicsgdata_mcp.realtime_catalog import (
+    describe_realtime_api,
+    list_realtime_dataset_names,
+    normalize_realtime_parameters,
+    resolve_realtime_api,
+)
+MAX_PREVIEW_ROWS = 50
+MAX_SEARCH_ROWS = 50
+_client: DataGovSGClient | None = None
+def get_client() -> DataGovSGClient:
+    global _client
+    if _client is None:
+        _client = DataGovSGClient()
+    return _client
+def close_client() -> None:
+    global _client
+    if _client is not None:
+        _client.close()
+        _client = None
+def _json(data: Any) -> str:
+    if hasattr(data, "model_dump"):
+        return json.dumps(data.model_dump(), default=str)
+    return json.dumps(data, default=str)
+def list_datasets(page: int | None = None) -> str:
+    response = get_client().datasets.list(page=page)
+    return _json(response)
+def get_dataset_metadata(dataset_id: str) -> str:
+    metadata = get_client().datasets.get_metadata(dataset_id)
+    return _json(metadata)
+def preview_dataset_rows(
+    dataset_id: str,
+    *,
+    limit: int = 10,
+    cursor: str | None = None,
+) -> str:
+    bounded_limit = min(max(limit, 1), MAX_PREVIEW_ROWS)
+    rows = get_client().datasets.list_rows(dataset_id, limit=bounded_limit, cursor=cursor)
+    return _json(rows)
+def search_dataset_rows(
+    dataset_id: str,
+    *,
+    q: str | None = None,
+    filters: dict[str, Any] | None = None,
+    sort: str | None = None,
+    limit: int = 20,
+    offset: int = 0,
+) -> str:
+    bounded_limit = min(max(limit, 1), MAX_SEARCH_ROWS)
+    result = get_client().datasets.search(
+        dataset_id,
+        q=q,
+        filters=filters,
+        sort=sort,
+        limit=bounded_limit,
+        offset=offset,
+    )
+    return _json(result)
+def get_dataset_download_url(
+    dataset_id: str,
+    *,
+    skip_initiate: bool = False,
+) -> str:
+    url = get_client().datasets.get_download_url(dataset_id, skip_initiate=skip_initiate)
+    return _json({"dataset_id": dataset_id, "url": url})
+def download_dataset_file(
+    dataset_id: str,
+    *,
+    filename: str | None = None,
+    skip_initiate: bool = False,
+) -> str:
+    destination = dataset_cache_path(dataset_id, filename=filename)
+    path = get_client().datasets.download_file(
+        dataset_id,
+        destination,
+        skip_initiate=skip_initiate,
+    )
+    metadata = get_client().datasets.get_metadata(dataset_id)
+    return _json(
+        {
+            "dataset_id": dataset_id,
+            "local_path": str(path),
+            "name": metadata.name,
+            "format": metadata.format,
+        }
+    )
+def list_realtime_datasets() -> str:
+    return _json(list_realtime_dataset_names())
+def describe_realtime_dataset(dataset_name: str) -> str:
+    return _json(describe_realtime_api(dataset_name))
+def fetch_realtime_data(
+    dataset_name: str,
+    parameters: dict[str, Any] | None = None,
+) -> str:
+    api = resolve_realtime_api(dataset_name)
+    params = normalize_realtime_parameters(api, parameters)
+    client = get_client()
+    if api.host == "v2_realtime":
+        payload = client._request_json(
+            "GET",
+            DataGovSGHost.REALTIME,
+            api.path,
+            params=params or None,
+        )
+        data = client._realtime_data(payload)
+    else:
+        raise ValueError(f"Unsupported realtime host {api.host!r} for {api.dataset_name}")
+    return _json(
+        {
+            "dataset_name": api.dataset_name,
+            "title": api.title,
+            "parameters": parameters or {},
+            "data": data,
+        }
+    )

publicsgdata_mcp-0.2.0/tests/test_tools.py ADDED Viewed

@@ -0,0 +1,153 @@
+from __future__ import annotations
+import json
+from collections.abc import Generator
+from pathlib import Path
+from unittest.mock import patch
+import httpx
+import pytest
+from publicsgdata_mcp import tools
+@pytest.fixture(autouse=True)
+def reset_client() -> Generator[None, None, None]:
+    tools.close_client()
+    yield
+    tools.close_client()
+def test_list_datasets() -> None:
+    fixture = {
+        "code": 0,
+        "data": {
+            "datasets": [
+                {
+                    "datasetId": "d_test",
+                    "name": "Test Dataset",
+                }
+            ]
+        },
+        "errorMsg": "",
+    }
+    def handler(request: httpx.Request) -> httpx.Response:
+        if request.url.path.endswith("/datasets"):
+            return httpx.Response(200, json=fixture)
+        return httpx.Response(404, json={"message": "not found"})
+    transport = httpx.MockTransport(handler)
+    with patch.object(tools, "get_client") as mock_get_client:
+        from publicsgdata import DataGovSGClient
+        client = DataGovSGClient(http_client=httpx.Client(transport=transport))
+        mock_get_client.return_value = client
+        payload = json.loads(tools.list_datasets())
+        assert payload["datasets"][0]["dataset_id"] == "d_test"
+        client.close()
+def test_download_dataset_file(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.setenv("PUBLICSGDATA_MCP_CACHE_DIR", str(tmp_path))
+    initiate = {"code": 0, "data": {"message": "ok"}, "errorMsg": ""}
+    poll = {
+        "code": 0,
+        "data": {"status": "READY", "url": "https://example.com/data.csv"},
+        "errorMsg": "",
+    }
+    metadata = {
+        "code": 0,
+        "data": {
+            "datasetId": "d_test",
+            "name": "Test Dataset",
+            "format": "CSV",
+        },
+        "errorMsg": "",
+    }
+    def handler(request: httpx.Request) -> httpx.Response:
+        path = request.url.path
+        if path.endswith("/initiate-download"):
+            return httpx.Response(200, json=initiate)
+        if path.endswith("/poll-download"):
+            return httpx.Response(200, json=poll)
+        if path.endswith("/metadata"):
+            return httpx.Response(200, json=metadata)
+        if request.url.host == "example.com":
+            return httpx.Response(200, content=b"col\n1\n")
+        return httpx.Response(404, json={"message": "not found"})
+    transport = httpx.MockTransport(handler)
+    with patch.object(tools, "get_client") as mock_get_client:
+        from publicsgdata import DataGovSGClient
+        client = DataGovSGClient(http_client=httpx.Client(transport=transport))
+        mock_get_client.return_value = client
+        payload = json.loads(tools.download_dataset_file("d_test"))
+        assert payload["dataset_id"] == "d_test"
+        assert payload["local_path"].endswith("d_test.csv")
+        assert (tmp_path / "datasets" / "d_test" / "d_test.csv").exists()
+        client.close()
+def test_list_realtime_datasets() -> None:
+    payload = json.loads(tools.list_realtime_datasets())
+    assert payload == ["air_quality_pm25_hourly_by_region"]
+def test_describe_realtime_dataset_one_api() -> None:
+    payload = json.loads(tools.describe_realtime_dataset("pm25"))
+    assert payload["dataset_name"] == "air_quality_pm25_hourly_by_region"
+    param_names = [param["name"] for param in payload["parameters"]]
+    assert "date" in param_names
+    assert "paginationToken" in param_names
+def test_fetch_realtime_data_pm25() -> None:
+    fixture = {
+        "code": 0,
+        "data": {
+            "regionMetadata": [
+                {
+                    "name": "central",
+                    "labelLocation": {"latitude": 1.35, "longitude": 103.82},
+                }
+            ],
+            "items": [
+                {
+                    "date": "2026-06-09",
+                    "timestamp": "2026-06-09T11:00:00+08:00",
+                    "readings": {"pm25_one_hourly": {"central": 17}},
+                }
+            ],
+        },
+        "errorMsg": "",
+    }
+    def handler(request: httpx.Request) -> httpx.Response:
+        if request.url.path.endswith("/pm25"):
+            return httpx.Response(200, json=fixture)
+        return httpx.Response(404, json={"message": "not found"})
+    transport = httpx.MockTransport(handler)
+    with patch.object(tools, "get_client") as mock_get_client:
+        from publicsgdata import DataGovSGClient
+        client = DataGovSGClient(http_client=httpx.Client(transport=transport))
+        mock_get_client.return_value = client
+        payload = json.loads(
+            tools.fetch_realtime_data("air_quality_pm25_hourly_by_region")
+        )
+        assert payload["dataset_name"] == "air_quality_pm25_hourly_by_region"
+        assert payload["data"]["items"][0]["readings"]["pm25_one_hourly"]["central"] == 17
+        client.close()
+def test_fetch_realtime_data_rejects_unknown_parameter() -> None:
+    with pytest.raises(ValueError, match="Unknown parameters"):
+        tools.fetch_realtime_data(
+            "air_quality_pm25_hourly_by_region",
+            parameters={"bogus": "value"},
+        )