publicsgdata 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- publicsgdata/__init__.py +23 -0
- publicsgdata/_base_client.py +138 -0
- publicsgdata/_constants.py +17 -0
- publicsgdata/_exceptions.py +38 -0
- publicsgdata/_pagination.py +150 -0
- publicsgdata/datagovsg/__init__.py +6 -0
- publicsgdata/datagovsg/_request.py +65 -0
- publicsgdata/datagovsg/async_client.py +73 -0
- publicsgdata/datagovsg/client.py +73 -0
- publicsgdata/datagovsg/models/__init__.py +29 -0
- publicsgdata/datagovsg/models/common.py +141 -0
- publicsgdata/datagovsg/resources/collections.py +43 -0
- publicsgdata/datagovsg/resources/datasets.py +248 -0
- publicsgdata/datagovsg/resources/realtime/__init__.py +11 -0
- publicsgdata/datagovsg/resources/realtime/pm25.py +58 -0
- publicsgdata-0.1.0.dist-info/METADATA +130 -0
- publicsgdata-0.1.0.dist-info/RECORD +19 -0
- publicsgdata-0.1.0.dist-info/WHEEL +4 -0
- publicsgdata-0.1.0.dist-info/licenses/LICENSE +21 -0
publicsgdata/__init__.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""publicsgdata: Python client for Singapore government public data."""
|
|
2
|
+
|
|
3
|
+
from publicsgdata._exceptions import (
|
|
4
|
+
APIError,
|
|
5
|
+
AuthenticationError,
|
|
6
|
+
NotFoundError,
|
|
7
|
+
PublicSGDataError,
|
|
8
|
+
RateLimitError,
|
|
9
|
+
)
|
|
10
|
+
from publicsgdata.datagovsg.async_client import AsyncDataGovSGClient
|
|
11
|
+
from publicsgdata.datagovsg.client import DataGovSGClient
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"APIError",
|
|
15
|
+
"AsyncDataGovSGClient",
|
|
16
|
+
"AuthenticationError",
|
|
17
|
+
"DataGovSGClient",
|
|
18
|
+
"NotFoundError",
|
|
19
|
+
"PublicSGDataError",
|
|
20
|
+
"RateLimitError",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Mapping
|
|
4
|
+
from typing import Any, Literal, cast
|
|
5
|
+
|
|
6
|
+
import httpx
|
|
7
|
+
|
|
8
|
+
from publicsgdata._constants import DEFAULT_TIMEOUT, HEADER_API_KEY
|
|
9
|
+
from publicsgdata._exceptions import (
|
|
10
|
+
APIError,
|
|
11
|
+
AuthenticationError,
|
|
12
|
+
NotFoundError,
|
|
13
|
+
RateLimitError,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class BaseHTTPClient:
|
|
18
|
+
"""Shared HTTP helpers for sync and async clients."""
|
|
19
|
+
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
*,
|
|
23
|
+
api_key: str | None = None,
|
|
24
|
+
timeout: float = DEFAULT_TIMEOUT,
|
|
25
|
+
max_retries: int = 0,
|
|
26
|
+
) -> None:
|
|
27
|
+
self._api_key = api_key
|
|
28
|
+
self._timeout = timeout
|
|
29
|
+
self._max_retries = max_retries
|
|
30
|
+
self._owns_client = False
|
|
31
|
+
|
|
32
|
+
def _auth_headers(self) -> dict[str, str]:
|
|
33
|
+
if self._api_key:
|
|
34
|
+
return {HEADER_API_KEY: self._api_key}
|
|
35
|
+
return {}
|
|
36
|
+
|
|
37
|
+
def _merge_headers(self, headers: Mapping[str, str] | None = None) -> dict[str, str]:
|
|
38
|
+
merged = dict(self._auth_headers())
|
|
39
|
+
if headers:
|
|
40
|
+
merged.update(headers)
|
|
41
|
+
return merged
|
|
42
|
+
|
|
43
|
+
def _parse_json(self, response: httpx.Response) -> Any:
|
|
44
|
+
if not response.content:
|
|
45
|
+
return None
|
|
46
|
+
return response.json()
|
|
47
|
+
|
|
48
|
+
def _raise_for_response(
|
|
49
|
+
self,
|
|
50
|
+
response: httpx.Response,
|
|
51
|
+
*,
|
|
52
|
+
payload: Any | None = None,
|
|
53
|
+
) -> None:
|
|
54
|
+
if response.is_success:
|
|
55
|
+
if isinstance(payload, dict):
|
|
56
|
+
code = payload.get("code")
|
|
57
|
+
if code not in (None, 0) and payload.get("success") is not True:
|
|
58
|
+
self._raise_api_payload(response, payload)
|
|
59
|
+
return
|
|
60
|
+
|
|
61
|
+
data = payload if payload is not None else self._parse_json(response)
|
|
62
|
+
if response.status_code == 429:
|
|
63
|
+
raise RateLimitError(
|
|
64
|
+
_error_message(data, response),
|
|
65
|
+
status_code=429,
|
|
66
|
+
code=_error_code(data),
|
|
67
|
+
name=_error_name(data),
|
|
68
|
+
body=data,
|
|
69
|
+
)
|
|
70
|
+
if response.status_code in (401, 403):
|
|
71
|
+
raise AuthenticationError(
|
|
72
|
+
_error_message(data, response),
|
|
73
|
+
status_code=response.status_code,
|
|
74
|
+
code=_error_code(data),
|
|
75
|
+
name=_error_name(data),
|
|
76
|
+
body=data,
|
|
77
|
+
)
|
|
78
|
+
if response.status_code == 404:
|
|
79
|
+
raise NotFoundError(
|
|
80
|
+
_error_message(data, response),
|
|
81
|
+
status_code=404,
|
|
82
|
+
code=_error_code(data),
|
|
83
|
+
name=_error_name(data),
|
|
84
|
+
body=data,
|
|
85
|
+
)
|
|
86
|
+
raise APIError(
|
|
87
|
+
_error_message(data, response),
|
|
88
|
+
status_code=response.status_code,
|
|
89
|
+
code=_error_code(data),
|
|
90
|
+
name=_error_name(data),
|
|
91
|
+
body=data,
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
def _raise_api_payload(self, response: httpx.Response, payload: dict[str, Any]) -> None:
|
|
95
|
+
message = _error_message(payload, response)
|
|
96
|
+
code = _error_code(payload)
|
|
97
|
+
name = _error_name(payload)
|
|
98
|
+
if response.status_code == 429 or code == 429:
|
|
99
|
+
raise RateLimitError(message, status_code=429, code=code, name=name, body=payload)
|
|
100
|
+
if name and "NOT_FOUND" in name.upper():
|
|
101
|
+
raise NotFoundError(message, status_code=404, code=code, name=name, body=payload)
|
|
102
|
+
raise APIError(
|
|
103
|
+
message,
|
|
104
|
+
status_code=response.status_code,
|
|
105
|
+
code=code,
|
|
106
|
+
name=name,
|
|
107
|
+
body=payload,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _error_message(data: Any, response: httpx.Response) -> str:
|
|
112
|
+
if isinstance(data, dict):
|
|
113
|
+
for key in ("errorMsg", "message", "error"):
|
|
114
|
+
value = data.get(key)
|
|
115
|
+
if isinstance(value, str) and value:
|
|
116
|
+
return value
|
|
117
|
+
if isinstance(value, dict):
|
|
118
|
+
return str(value)
|
|
119
|
+
if data.get("success") is False and isinstance(data.get("error"), dict):
|
|
120
|
+
return str(data["error"])
|
|
121
|
+
return f"HTTP {response.status_code} error for {response.request.url}"
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _error_code(data: Any) -> int | str | None:
|
|
125
|
+
if isinstance(data, dict) and "code" in data:
|
|
126
|
+
return cast(int | str | None, data.get("code"))
|
|
127
|
+
return None
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _error_name(data: Any) -> str | None:
|
|
131
|
+
if isinstance(data, dict) and isinstance(data.get("name"), str):
|
|
132
|
+
return cast(str, data["name"])
|
|
133
|
+
return None
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
SyncHTTPClient = httpx.Client
|
|
137
|
+
AsyncHTTPClient = httpx.AsyncClient
|
|
138
|
+
HTTPMethod = Literal["GET", "POST", "PUT", "PATCH", "DELETE"]
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
ENV_API_KEY = "DATA_GOV_SG_API_KEY"
|
|
6
|
+
HEADER_API_KEY = "x-api-key"
|
|
7
|
+
|
|
8
|
+
DEFAULT_TIMEOUT = 30.0
|
|
9
|
+
|
|
10
|
+
# data.gov.sg hosts
|
|
11
|
+
CATALOG_BASE_URL = "https://api-production.data.gov.sg/v2/public/api"
|
|
12
|
+
CKAN_BASE_URL = "https://data.gov.sg"
|
|
13
|
+
REALTIME_BASE_URL = "https://api-open.data.gov.sg/v2/real-time/api"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def default_api_key() -> str | None:
|
|
17
|
+
return os.environ.get(ENV_API_KEY)
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class PublicSGDataError(Exception):
|
|
7
|
+
"""Base exception for publicsgdata."""
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class APIError(PublicSGDataError):
|
|
11
|
+
"""Raised when the API returns an error response."""
|
|
12
|
+
|
|
13
|
+
def __init__(
|
|
14
|
+
self,
|
|
15
|
+
message: str,
|
|
16
|
+
*,
|
|
17
|
+
status_code: int | None = None,
|
|
18
|
+
code: int | str | None = None,
|
|
19
|
+
name: str | None = None,
|
|
20
|
+
body: Any | None = None,
|
|
21
|
+
) -> None:
|
|
22
|
+
super().__init__(message)
|
|
23
|
+
self.status_code = status_code
|
|
24
|
+
self.code = code
|
|
25
|
+
self.name = name
|
|
26
|
+
self.body = body
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class RateLimitError(APIError):
|
|
30
|
+
"""Raised when rate limits are exceeded (HTTP 429)."""
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class AuthenticationError(APIError):
|
|
34
|
+
"""Raised when authentication fails (HTTP 401/403)."""
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class NotFoundError(APIError):
|
|
38
|
+
"""Raised when a resource is not found (HTTP 404)."""
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import AsyncIterator, Iterator
|
|
4
|
+
from typing import Any, Generic, TypeVar
|
|
5
|
+
|
|
6
|
+
from publicsgdata.datagovsg.models.common import DatasetRow
|
|
7
|
+
|
|
8
|
+
T = TypeVar("T")
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class SyncPageIterator(Generic[T]):
|
|
12
|
+
"""Iterate paginated API results synchronously."""
|
|
13
|
+
|
|
14
|
+
def __init__(
|
|
15
|
+
self,
|
|
16
|
+
fetch_page: Any,
|
|
17
|
+
*,
|
|
18
|
+
get_items: Any,
|
|
19
|
+
get_next_cursor: Any,
|
|
20
|
+
) -> None:
|
|
21
|
+
self._fetch_page = fetch_page
|
|
22
|
+
self._get_items = get_items
|
|
23
|
+
self._get_next_cursor = get_next_cursor
|
|
24
|
+
self._cursor: str | None = None
|
|
25
|
+
self._buffer: list[T] = []
|
|
26
|
+
self._index = 0
|
|
27
|
+
self._exhausted = False
|
|
28
|
+
|
|
29
|
+
def __iter__(self) -> Iterator[T]:
|
|
30
|
+
return self
|
|
31
|
+
|
|
32
|
+
def __next__(self) -> T:
|
|
33
|
+
if self._index >= len(self._buffer):
|
|
34
|
+
if self._exhausted:
|
|
35
|
+
raise StopIteration
|
|
36
|
+
response = self._fetch_page(self._cursor)
|
|
37
|
+
self._buffer = list(self._get_items(response))
|
|
38
|
+
self._index = 0
|
|
39
|
+
self._cursor = self._get_next_cursor(response)
|
|
40
|
+
if not self._buffer:
|
|
41
|
+
self._exhausted = True
|
|
42
|
+
raise StopIteration
|
|
43
|
+
if self._cursor is None:
|
|
44
|
+
self._exhausted = True
|
|
45
|
+
item = self._buffer[self._index]
|
|
46
|
+
self._index += 1
|
|
47
|
+
return item
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class AsyncPageIterator(Generic[T]):
|
|
51
|
+
"""Iterate paginated API results asynchronously."""
|
|
52
|
+
|
|
53
|
+
def __init__(
|
|
54
|
+
self,
|
|
55
|
+
fetch_page: Any,
|
|
56
|
+
*,
|
|
57
|
+
get_items: Any,
|
|
58
|
+
get_next_cursor: Any,
|
|
59
|
+
) -> None:
|
|
60
|
+
self._fetch_page = fetch_page
|
|
61
|
+
self._get_items = get_items
|
|
62
|
+
self._get_next_cursor = get_next_cursor
|
|
63
|
+
self._cursor: str | None = None
|
|
64
|
+
self._buffer: list[T] = []
|
|
65
|
+
self._index = 0
|
|
66
|
+
self._exhausted = False
|
|
67
|
+
|
|
68
|
+
def __aiter__(self) -> AsyncIterator[T]:
|
|
69
|
+
return self
|
|
70
|
+
|
|
71
|
+
async def __anext__(self) -> T:
|
|
72
|
+
if self._index >= len(self._buffer):
|
|
73
|
+
if self._exhausted:
|
|
74
|
+
raise StopAsyncIteration
|
|
75
|
+
response = await self._fetch_page(self._cursor)
|
|
76
|
+
self._buffer = list(self._get_items(response))
|
|
77
|
+
self._index = 0
|
|
78
|
+
self._cursor = self._get_next_cursor(response)
|
|
79
|
+
if not self._buffer:
|
|
80
|
+
self._exhausted = True
|
|
81
|
+
raise StopAsyncIteration
|
|
82
|
+
if self._cursor is None:
|
|
83
|
+
self._exhausted = True
|
|
84
|
+
item = self._buffer[self._index]
|
|
85
|
+
self._index += 1
|
|
86
|
+
return item
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def parse_cursor_from_next_link(next_link: str | None) -> str | None:
|
|
90
|
+
"""Extract cursor query string from v2 list-rows next link."""
|
|
91
|
+
if not next_link:
|
|
92
|
+
return None
|
|
93
|
+
return next_link
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def offset_from_ckan_link(next_link: str | None, *, base_path: str) -> int | None:
|
|
97
|
+
"""Parse offset from CKAN _links.next relative URL."""
|
|
98
|
+
if not next_link:
|
|
99
|
+
return None
|
|
100
|
+
from urllib.parse import parse_qs, urlparse
|
|
101
|
+
|
|
102
|
+
parsed = urlparse(next_link if "://" in next_link else f"{base_path}{next_link}")
|
|
103
|
+
values = parse_qs(parsed.query).get("offset")
|
|
104
|
+
if not values:
|
|
105
|
+
return None
|
|
106
|
+
return int(values[0])
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class CkanSearchIterator(SyncPageIterator[DatasetRow]):
|
|
110
|
+
"""Sync iterator for CKAN datastore_search offset pagination."""
|
|
111
|
+
|
|
112
|
+
def __init__(
|
|
113
|
+
self,
|
|
114
|
+
fetch_page: Any,
|
|
115
|
+
*,
|
|
116
|
+
initial_offset: int = 0,
|
|
117
|
+
) -> None:
|
|
118
|
+
super().__init__(
|
|
119
|
+
lambda cursor: fetch_page(int(cursor) if cursor is not None else initial_offset),
|
|
120
|
+
get_items=lambda response: response.records,
|
|
121
|
+
get_next_cursor=lambda response: (
|
|
122
|
+
str(response.offset + len(response.records))
|
|
123
|
+
if response.links.next is not None and len(response.records) > 0
|
|
124
|
+
else None
|
|
125
|
+
),
|
|
126
|
+
)
|
|
127
|
+
if initial_offset:
|
|
128
|
+
self._cursor = str(initial_offset)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
class AsyncCkanSearchIterator(AsyncPageIterator[DatasetRow]):
|
|
132
|
+
"""Async iterator for CKAN datastore_search offset pagination."""
|
|
133
|
+
|
|
134
|
+
def __init__(
|
|
135
|
+
self,
|
|
136
|
+
fetch_page: Any,
|
|
137
|
+
*,
|
|
138
|
+
initial_offset: int = 0,
|
|
139
|
+
) -> None:
|
|
140
|
+
super().__init__(
|
|
141
|
+
lambda cursor: fetch_page(int(cursor) if cursor is not None else initial_offset),
|
|
142
|
+
get_items=lambda response: response.records,
|
|
143
|
+
get_next_cursor=lambda response: (
|
|
144
|
+
str(response.offset + len(response.records))
|
|
145
|
+
if response.links.next is not None and len(response.records) > 0
|
|
146
|
+
else None
|
|
147
|
+
),
|
|
148
|
+
)
|
|
149
|
+
if initial_offset:
|
|
150
|
+
self._cursor = str(initial_offset)
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Mapping
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from typing import Any
|
|
6
|
+
from urllib.parse import urljoin
|
|
7
|
+
|
|
8
|
+
from publicsgdata._base_client import BaseHTTPClient
|
|
9
|
+
from publicsgdata._constants import CATALOG_BASE_URL, CKAN_BASE_URL, REALTIME_BASE_URL
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class DataGovSGHost(str, Enum):
|
|
13
|
+
CATALOG = "catalog"
|
|
14
|
+
CKAN = "ckan"
|
|
15
|
+
REALTIME = "realtime"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class DataGovSGRequestMixin(BaseHTTPClient):
|
|
19
|
+
"""HTTP request helpers routed to data.gov.sg hosts."""
|
|
20
|
+
|
|
21
|
+
def _base_url(self, host: DataGovSGHost) -> str:
|
|
22
|
+
if host is DataGovSGHost.CATALOG:
|
|
23
|
+
return CATALOG_BASE_URL
|
|
24
|
+
if host is DataGovSGHost.CKAN:
|
|
25
|
+
return CKAN_BASE_URL
|
|
26
|
+
return REALTIME_BASE_URL
|
|
27
|
+
|
|
28
|
+
def _build_url(self, host: DataGovSGHost, path: str) -> str:
|
|
29
|
+
base = self._base_url(host)
|
|
30
|
+
if not path.startswith("/"):
|
|
31
|
+
path = f"/{path}"
|
|
32
|
+
return urljoin(f"{base}/", path.lstrip("/"))
|
|
33
|
+
|
|
34
|
+
def _catalog_data(self, payload: dict[str, Any]) -> dict[str, Any]:
|
|
35
|
+
data = payload.get("data")
|
|
36
|
+
if isinstance(data, dict):
|
|
37
|
+
return data
|
|
38
|
+
return payload
|
|
39
|
+
|
|
40
|
+
def _realtime_data(self, payload: dict[str, Any]) -> dict[str, Any]:
|
|
41
|
+
data = payload.get("data")
|
|
42
|
+
if isinstance(data, dict):
|
|
43
|
+
return data
|
|
44
|
+
return payload
|
|
45
|
+
|
|
46
|
+
@staticmethod
|
|
47
|
+
def _cursor_params(cursor: str | None, *, limit: int | None = None) -> dict[str, str]:
|
|
48
|
+
params: dict[str, str] = {}
|
|
49
|
+
if limit is not None:
|
|
50
|
+
params["limit"] = str(limit)
|
|
51
|
+
if cursor:
|
|
52
|
+
for part in cursor.split("&"):
|
|
53
|
+
if "=" in part:
|
|
54
|
+
key, value = part.split("=", 1)
|
|
55
|
+
params[key] = value
|
|
56
|
+
return params
|
|
57
|
+
|
|
58
|
+
@staticmethod
|
|
59
|
+
def _encode_query(params: Mapping[str, Any]) -> dict[str, str]:
|
|
60
|
+
encoded: dict[str, str] = {}
|
|
61
|
+
for key, value in params.items():
|
|
62
|
+
if value is None:
|
|
63
|
+
continue
|
|
64
|
+
encoded[key] = str(value)
|
|
65
|
+
return encoded
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Mapping
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
import httpx
|
|
7
|
+
|
|
8
|
+
from publicsgdata._constants import DEFAULT_TIMEOUT, default_api_key
|
|
9
|
+
from publicsgdata.datagovsg._request import DataGovSGHost, DataGovSGRequestMixin
|
|
10
|
+
from publicsgdata.datagovsg.resources.collections import AsyncCollectionsResource
|
|
11
|
+
from publicsgdata.datagovsg.resources.datasets import AsyncDatasetsResource
|
|
12
|
+
from publicsgdata.datagovsg.resources.realtime import AsyncRealtimeResource
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class AsyncDataGovSGClient(DataGovSGRequestMixin):
|
|
16
|
+
"""Async client for data.gov.sg APIs."""
|
|
17
|
+
|
|
18
|
+
_http_client: httpx.AsyncClient
|
|
19
|
+
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
*,
|
|
23
|
+
api_key: str | None = None,
|
|
24
|
+
http_client: httpx.AsyncClient | None = None,
|
|
25
|
+
timeout: float = DEFAULT_TIMEOUT,
|
|
26
|
+
max_retries: int = 0,
|
|
27
|
+
) -> None:
|
|
28
|
+
super().__init__(
|
|
29
|
+
api_key=api_key or default_api_key(), timeout=timeout, max_retries=max_retries
|
|
30
|
+
)
|
|
31
|
+
if http_client is not None:
|
|
32
|
+
self._http_client = http_client
|
|
33
|
+
self._owns_client = False
|
|
34
|
+
else:
|
|
35
|
+
self._http_client = httpx.AsyncClient(timeout=timeout)
|
|
36
|
+
self._owns_client = True
|
|
37
|
+
|
|
38
|
+
self.collections = AsyncCollectionsResource(self)
|
|
39
|
+
self.datasets = AsyncDatasetsResource(self)
|
|
40
|
+
self.realtime = AsyncRealtimeResource(self)
|
|
41
|
+
|
|
42
|
+
async def __aenter__(self) -> AsyncDataGovSGClient:
|
|
43
|
+
return self
|
|
44
|
+
|
|
45
|
+
async def __aexit__(self, *args: object) -> None:
|
|
46
|
+
await self.close()
|
|
47
|
+
|
|
48
|
+
async def close(self) -> None:
|
|
49
|
+
if self._owns_client:
|
|
50
|
+
await self._http_client.aclose()
|
|
51
|
+
|
|
52
|
+
async def _request_json(
|
|
53
|
+
self,
|
|
54
|
+
method: str,
|
|
55
|
+
host: DataGovSGHost,
|
|
56
|
+
path: str,
|
|
57
|
+
*,
|
|
58
|
+
params: Mapping[str, Any] | None = None,
|
|
59
|
+
) -> dict[str, Any]:
|
|
60
|
+
url = self._build_url(host, path)
|
|
61
|
+
encoded = self._encode_query(params) if params else None
|
|
62
|
+
response = await self._http_client.request(
|
|
63
|
+
method,
|
|
64
|
+
url,
|
|
65
|
+
params=encoded,
|
|
66
|
+
headers=self._merge_headers(),
|
|
67
|
+
)
|
|
68
|
+
payload = self._parse_json(response)
|
|
69
|
+
if not isinstance(payload, dict):
|
|
70
|
+
self._raise_for_response(response, payload=payload)
|
|
71
|
+
return {}
|
|
72
|
+
self._raise_for_response(response, payload=payload)
|
|
73
|
+
return payload
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Mapping
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
import httpx
|
|
7
|
+
|
|
8
|
+
from publicsgdata._constants import DEFAULT_TIMEOUT, default_api_key
|
|
9
|
+
from publicsgdata.datagovsg._request import DataGovSGHost, DataGovSGRequestMixin
|
|
10
|
+
from publicsgdata.datagovsg.resources.collections import CollectionsResource
|
|
11
|
+
from publicsgdata.datagovsg.resources.datasets import DatasetsResource
|
|
12
|
+
from publicsgdata.datagovsg.resources.realtime import RealtimeResource
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class DataGovSGClient(DataGovSGRequestMixin):
|
|
16
|
+
"""Sync client for data.gov.sg APIs."""
|
|
17
|
+
|
|
18
|
+
_http_client: httpx.Client
|
|
19
|
+
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
*,
|
|
23
|
+
api_key: str | None = None,
|
|
24
|
+
http_client: httpx.Client | None = None,
|
|
25
|
+
timeout: float = DEFAULT_TIMEOUT,
|
|
26
|
+
max_retries: int = 0,
|
|
27
|
+
) -> None:
|
|
28
|
+
super().__init__(
|
|
29
|
+
api_key=api_key or default_api_key(), timeout=timeout, max_retries=max_retries
|
|
30
|
+
)
|
|
31
|
+
if http_client is not None:
|
|
32
|
+
self._http_client = http_client
|
|
33
|
+
self._owns_client = False
|
|
34
|
+
else:
|
|
35
|
+
self._http_client = httpx.Client(timeout=timeout)
|
|
36
|
+
self._owns_client = True
|
|
37
|
+
|
|
38
|
+
self.collections = CollectionsResource(self)
|
|
39
|
+
self.datasets = DatasetsResource(self)
|
|
40
|
+
self.realtime = RealtimeResource(self)
|
|
41
|
+
|
|
42
|
+
def __enter__(self) -> DataGovSGClient:
|
|
43
|
+
return self
|
|
44
|
+
|
|
45
|
+
def __exit__(self, *args: object) -> None:
|
|
46
|
+
self.close()
|
|
47
|
+
|
|
48
|
+
def close(self) -> None:
|
|
49
|
+
if self._owns_client:
|
|
50
|
+
self._http_client.close()
|
|
51
|
+
|
|
52
|
+
def _request_json(
|
|
53
|
+
self,
|
|
54
|
+
method: str,
|
|
55
|
+
host: DataGovSGHost,
|
|
56
|
+
path: str,
|
|
57
|
+
*,
|
|
58
|
+
params: Mapping[str, Any] | None = None,
|
|
59
|
+
) -> dict[str, Any]:
|
|
60
|
+
url = self._build_url(host, path)
|
|
61
|
+
encoded = self._encode_query(params) if params else None
|
|
62
|
+
response = self._http_client.request(
|
|
63
|
+
method,
|
|
64
|
+
url,
|
|
65
|
+
params=encoded,
|
|
66
|
+
headers=self._merge_headers(),
|
|
67
|
+
)
|
|
68
|
+
payload = self._parse_json(response)
|
|
69
|
+
if not isinstance(payload, dict):
|
|
70
|
+
self._raise_for_response(response, payload=payload)
|
|
71
|
+
return {}
|
|
72
|
+
self._raise_for_response(response, payload=payload)
|
|
73
|
+
return payload
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from publicsgdata.datagovsg.models.common import (
|
|
2
|
+
CollectionListResponse,
|
|
3
|
+
CollectionMetadata,
|
|
4
|
+
CollectionMetadataResponse,
|
|
5
|
+
CollectionSummary,
|
|
6
|
+
DatasetListResponse,
|
|
7
|
+
DatasetMetadata,
|
|
8
|
+
DatasetMetadataResponse,
|
|
9
|
+
DatasetRow,
|
|
10
|
+
DatasetRowsResponse,
|
|
11
|
+
DatasetSummary,
|
|
12
|
+
DatastoreSearchResult,
|
|
13
|
+
PM25Response,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"CollectionListResponse",
|
|
18
|
+
"CollectionMetadata",
|
|
19
|
+
"CollectionMetadataResponse",
|
|
20
|
+
"CollectionSummary",
|
|
21
|
+
"DatasetListResponse",
|
|
22
|
+
"DatasetMetadata",
|
|
23
|
+
"DatasetMetadataResponse",
|
|
24
|
+
"DatasetRow",
|
|
25
|
+
"DatasetRowsResponse",
|
|
26
|
+
"DatasetSummary",
|
|
27
|
+
"DatastoreSearchResult",
|
|
28
|
+
"PM25Response",
|
|
29
|
+
]
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ApiModel(BaseModel):
|
|
9
|
+
model_config = ConfigDict(extra="allow", populate_by_name=True)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class PaginationLinks(ApiModel):
|
|
13
|
+
next: str | None = None
|
|
14
|
+
start: str | None = None
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class DatasetRow(ApiModel):
|
|
18
|
+
model_config = ConfigDict(extra="allow")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class CollectionSummary(ApiModel):
|
|
22
|
+
collection_id: str = Field(alias="collectionId")
|
|
23
|
+
name: str
|
|
24
|
+
description: str | None = None
|
|
25
|
+
created_at: str | None = Field(default=None, alias="createdAt")
|
|
26
|
+
last_updated_at: str | None = Field(default=None, alias="lastUpdatedAt")
|
|
27
|
+
frequency: str | None = None
|
|
28
|
+
sources: list[str] | None = None
|
|
29
|
+
managed_by_agency_name: str | None = Field(default=None, alias="managedByAgencyName")
|
|
30
|
+
child_datasets: list[str] | None = Field(default=None, alias="childDatasets")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class CollectionMetadata(ApiModel):
|
|
34
|
+
collection_id: str = Field(alias="collectionId")
|
|
35
|
+
name: str
|
|
36
|
+
description: str | None = None
|
|
37
|
+
created_at: str | None = Field(default=None, alias="createdAt")
|
|
38
|
+
last_updated_at: str | None = Field(default=None, alias="lastUpdatedAt")
|
|
39
|
+
frequency: str | None = None
|
|
40
|
+
sources: list[str] | None = None
|
|
41
|
+
managed_by: str | None = Field(default=None, alias="managedBy")
|
|
42
|
+
child_datasets: list[str] | None = Field(default=None, alias="childDatasets")
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class CollectionListResponse(ApiModel):
|
|
46
|
+
collections: list[CollectionSummary]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class CollectionMetadataResponse(ApiModel):
|
|
50
|
+
collection_metadata: CollectionMetadata = Field(alias="collectionMetadata")
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class DatasetSummary(ApiModel):
|
|
54
|
+
dataset_id: str = Field(alias="datasetId")
|
|
55
|
+
name: str
|
|
56
|
+
status: str | None = None
|
|
57
|
+
description: str | None = None
|
|
58
|
+
format: str | None = None
|
|
59
|
+
created_at: str | None = Field(default=None, alias="createdAt")
|
|
60
|
+
last_updated_at: str | None = Field(default=None, alias="lastUpdatedAt")
|
|
61
|
+
managed_by_agency_name: str | None = Field(default=None, alias="managedByAgencyName")
|
|
62
|
+
coverage_start: str | None = Field(default=None, alias="coverageStart")
|
|
63
|
+
coverage_end: str | None = Field(default=None, alias="coverageEnd")
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class DatasetListResponse(ApiModel):
|
|
67
|
+
datasets: list[DatasetSummary]
|
|
68
|
+
pages: int | None = None
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class DatasetMetadata(ApiModel):
|
|
72
|
+
dataset_id: str = Field(alias="datasetId")
|
|
73
|
+
name: str
|
|
74
|
+
description: str | None = None
|
|
75
|
+
format: str | None = None
|
|
76
|
+
status: str | None = None
|
|
77
|
+
created_at: str | None = Field(default=None, alias="createdAt")
|
|
78
|
+
last_updated_at: str | None = Field(default=None, alias="lastUpdatedAt")
|
|
79
|
+
managed_by: str | None = Field(default=None, alias="managedBy")
|
|
80
|
+
collection_ids: list[str] | None = Field(default=None, alias="collectionIds")
|
|
81
|
+
coverage_start: str | None = Field(default=None, alias="coverageStart")
|
|
82
|
+
coverage_end: str | None = Field(default=None, alias="coverageEnd")
|
|
83
|
+
contact_emails: list[str] | None = Field(default=None, alias="contactEmails")
|
|
84
|
+
dataset_size: int | None = Field(default=None, alias="datasetSize")
|
|
85
|
+
column_metadata: dict[str, Any] | None = Field(default=None, alias="columnMetadata")
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class DatasetMetadataResponse(ApiModel):
|
|
89
|
+
dataset_metadata: DatasetMetadata | None = Field(default=None, alias="datasetMetadata")
|
|
90
|
+
|
|
91
|
+
@property
|
|
92
|
+
def metadata(self) -> DatasetMetadata:
|
|
93
|
+
if self.dataset_metadata is not None:
|
|
94
|
+
return self.dataset_metadata
|
|
95
|
+
raise ValueError("dataset metadata missing from response")
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class DatasetRowsResponse(ApiModel):
|
|
99
|
+
dataset_id: str = Field(alias="datasetId")
|
|
100
|
+
dataset_name: str | None = Field(default=None, alias="datasetName")
|
|
101
|
+
rows: list[DatasetRow]
|
|
102
|
+
limit: int
|
|
103
|
+
links: PaginationLinks | None = None
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class DatastoreField(ApiModel):
|
|
107
|
+
id: str
|
|
108
|
+
type: str | None = None
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class DatastoreSearchLinks(ApiModel):
|
|
112
|
+
start: str | None = None
|
|
113
|
+
next: str | None = None
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class DatastoreSearchResult(ApiModel):
|
|
117
|
+
resource_id: str
|
|
118
|
+
fields: list[DatastoreField]
|
|
119
|
+
records: list[DatasetRow]
|
|
120
|
+
total: int
|
|
121
|
+
limit: int
|
|
122
|
+
offset: int = 0
|
|
123
|
+
links: DatastoreSearchLinks = Field(default_factory=DatastoreSearchLinks)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
class PM25Reading(ApiModel):
|
|
127
|
+
date: str | None = None
|
|
128
|
+
updated_timestamp: str | None = Field(default=None, alias="updatedTimestamp")
|
|
129
|
+
timestamp: str | None = None
|
|
130
|
+
readings: dict[str, Any] | None = None
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class PM25RegionMetadata(ApiModel):
|
|
134
|
+
name: str
|
|
135
|
+
label_location: dict[str, float] | None = Field(default=None, alias="labelLocation")
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
class PM25Response(ApiModel):
|
|
139
|
+
region_metadata: list[PM25RegionMetadata] = Field(default_factory=list, alias="regionMetadata")
|
|
140
|
+
items: list[PM25Reading] = Field(default_factory=list)
|
|
141
|
+
pagination_token: str | None = Field(default=None, alias="paginationToken")
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
from publicsgdata.datagovsg._request import DataGovSGHost
|
|
6
|
+
from publicsgdata.datagovsg.models import (
|
|
7
|
+
CollectionListResponse,
|
|
8
|
+
CollectionMetadata,
|
|
9
|
+
CollectionMetadataResponse,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from publicsgdata.datagovsg.client import DataGovSGClient
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class CollectionsResource:
|
|
17
|
+
def __init__(self, client: DataGovSGClient) -> None:
|
|
18
|
+
self._client = client
|
|
19
|
+
|
|
20
|
+
def list(self) -> CollectionListResponse:
|
|
21
|
+
payload = self._client._request_json("GET", DataGovSGHost.CATALOG, "/collections")
|
|
22
|
+
return CollectionListResponse.model_validate(self._client._catalog_data(payload))
|
|
23
|
+
|
|
24
|
+
def get_metadata(self, collection_id: str) -> CollectionMetadata:
|
|
25
|
+
path = f"/collections/{collection_id}/metadata"
|
|
26
|
+
payload = self._client._request_json("GET", DataGovSGHost.CATALOG, path)
|
|
27
|
+
response = CollectionMetadataResponse.model_validate(self._client._catalog_data(payload))
|
|
28
|
+
return response.collection_metadata
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class AsyncCollectionsResource:
|
|
32
|
+
def __init__(self, client: object) -> None:
|
|
33
|
+
self._client = client
|
|
34
|
+
|
|
35
|
+
async def list(self) -> CollectionListResponse:
|
|
36
|
+
payload = await self._client._request_json("GET", DataGovSGHost.CATALOG, "/collections") # type: ignore[attr-defined]
|
|
37
|
+
return CollectionListResponse.model_validate(self._client._catalog_data(payload)) # type: ignore[attr-defined]
|
|
38
|
+
|
|
39
|
+
async def get_metadata(self, collection_id: str) -> CollectionMetadata:
|
|
40
|
+
path = f"/collections/{collection_id}/metadata"
|
|
41
|
+
payload = await self._client._request_json("GET", DataGovSGHost.CATALOG, path) # type: ignore[attr-defined]
|
|
42
|
+
response = CollectionMetadataResponse.model_validate(self._client._catalog_data(payload)) # type: ignore[attr-defined]
|
|
43
|
+
return response.collection_metadata
|
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from collections.abc import AsyncIterator, Iterator, Mapping
|
|
5
|
+
from typing import TYPE_CHECKING, Any
|
|
6
|
+
|
|
7
|
+
from publicsgdata._pagination import AsyncCkanSearchIterator, CkanSearchIterator, SyncPageIterator
|
|
8
|
+
from publicsgdata.datagovsg._request import DataGovSGHost
|
|
9
|
+
from publicsgdata.datagovsg.models import (
|
|
10
|
+
DatasetListResponse,
|
|
11
|
+
DatasetMetadata,
|
|
12
|
+
DatasetRow,
|
|
13
|
+
DatasetRowsResponse,
|
|
14
|
+
DatastoreSearchResult,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from publicsgdata.datagovsg.async_client import AsyncDataGovSGClient
|
|
19
|
+
from publicsgdata.datagovsg.client import DataGovSGClient
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class DatasetsResource:
|
|
23
|
+
def __init__(self, client: DataGovSGClient) -> None:
|
|
24
|
+
self._client = client
|
|
25
|
+
|
|
26
|
+
def list(self, *, page: int | None = None) -> DatasetListResponse:
|
|
27
|
+
params = {"page": page} if page is not None else None
|
|
28
|
+
payload = self._client._request_json(
|
|
29
|
+
"GET", DataGovSGHost.CATALOG, "/datasets", params=params
|
|
30
|
+
)
|
|
31
|
+
return DatasetListResponse.model_validate(self._client._catalog_data(payload))
|
|
32
|
+
|
|
33
|
+
def get_metadata(self, dataset_id: str) -> DatasetMetadata:
|
|
34
|
+
path = f"/datasets/{dataset_id}/metadata"
|
|
35
|
+
payload = self._client._request_json("GET", DataGovSGHost.CATALOG, path)
|
|
36
|
+
return DatasetMetadata.model_validate(self._client._catalog_data(payload))
|
|
37
|
+
|
|
38
|
+
def list_rows(
|
|
39
|
+
self,
|
|
40
|
+
dataset_id: str,
|
|
41
|
+
*,
|
|
42
|
+
limit: int = 100,
|
|
43
|
+
cursor: str | None = None,
|
|
44
|
+
) -> DatasetRowsResponse:
|
|
45
|
+
path = f"/datasets/{dataset_id}/list-rows"
|
|
46
|
+
params = self._client._cursor_params(cursor, limit=limit)
|
|
47
|
+
payload = self._client._request_json("GET", DataGovSGHost.CATALOG, path, params=params)
|
|
48
|
+
return DatasetRowsResponse.model_validate(self._client._catalog_data(payload))
|
|
49
|
+
|
|
50
|
+
def iter_rows(
|
|
51
|
+
self,
|
|
52
|
+
dataset_id: str,
|
|
53
|
+
*,
|
|
54
|
+
limit: int = 100,
|
|
55
|
+
cursor: str | None = None,
|
|
56
|
+
) -> Iterator[DatasetRow]:
|
|
57
|
+
iterator: SyncPageIterator[DatasetRow] = SyncPageIterator(
|
|
58
|
+
lambda next_cursor: self.list_rows(dataset_id, limit=limit, cursor=next_cursor),
|
|
59
|
+
get_items=lambda response: response.rows,
|
|
60
|
+
get_next_cursor=lambda response: (
|
|
61
|
+
response.links.next if response.links is not None else None
|
|
62
|
+
),
|
|
63
|
+
)
|
|
64
|
+
iterator._cursor = cursor
|
|
65
|
+
return iterator
|
|
66
|
+
|
|
67
|
+
def search(
|
|
68
|
+
self,
|
|
69
|
+
resource_id: str,
|
|
70
|
+
*,
|
|
71
|
+
limit: int = 100,
|
|
72
|
+
offset: int = 0,
|
|
73
|
+
filters: Mapping[str, Any] | None = None,
|
|
74
|
+
q: str | Mapping[str, Any] | None = None,
|
|
75
|
+
sort: str | None = None,
|
|
76
|
+
fields: str | None = None,
|
|
77
|
+
) -> DatastoreSearchResult:
|
|
78
|
+
params: dict[str, Any] = {
|
|
79
|
+
"resource_id": resource_id,
|
|
80
|
+
"limit": limit,
|
|
81
|
+
"offset": offset,
|
|
82
|
+
}
|
|
83
|
+
if filters is not None:
|
|
84
|
+
params["filters"] = json.dumps(filters)
|
|
85
|
+
if q is not None:
|
|
86
|
+
params["q"] = json.dumps(q) if isinstance(q, Mapping) else q
|
|
87
|
+
if sort is not None:
|
|
88
|
+
params["sort"] = sort
|
|
89
|
+
if fields is not None:
|
|
90
|
+
params["fields"] = fields
|
|
91
|
+
|
|
92
|
+
payload = self._client._request_json(
|
|
93
|
+
"GET",
|
|
94
|
+
DataGovSGHost.CKAN,
|
|
95
|
+
"/api/action/datastore_search",
|
|
96
|
+
params=params,
|
|
97
|
+
)
|
|
98
|
+
result = payload.get("result", payload)
|
|
99
|
+
records = result.get("records", [])
|
|
100
|
+
return DatastoreSearchResult(
|
|
101
|
+
resource_id=result.get("resource_id", resource_id),
|
|
102
|
+
fields=result.get("fields", []),
|
|
103
|
+
records=records,
|
|
104
|
+
total=result.get("total", len(records)),
|
|
105
|
+
limit=result.get("limit", limit),
|
|
106
|
+
offset=offset,
|
|
107
|
+
links=result.get("_links", {}),
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
def iter_search(
|
|
111
|
+
self,
|
|
112
|
+
resource_id: str,
|
|
113
|
+
*,
|
|
114
|
+
limit: int = 100,
|
|
115
|
+
offset: int = 0,
|
|
116
|
+
filters: Mapping[str, Any] | None = None,
|
|
117
|
+
q: str | Mapping[str, Any] | None = None,
|
|
118
|
+
sort: str | None = None,
|
|
119
|
+
) -> Iterator[DatasetRow]:
|
|
120
|
+
def fetch_page(page_offset: int) -> DatastoreSearchResult:
|
|
121
|
+
return self.search(
|
|
122
|
+
resource_id,
|
|
123
|
+
limit=limit,
|
|
124
|
+
offset=page_offset,
|
|
125
|
+
filters=filters,
|
|
126
|
+
q=q,
|
|
127
|
+
sort=sort,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
return CkanSearchIterator(fetch_page, initial_offset=offset)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class AsyncDatasetsResource:
|
|
134
|
+
def __init__(self, client: AsyncDataGovSGClient) -> None:
|
|
135
|
+
self._client = client
|
|
136
|
+
|
|
137
|
+
async def list(self, *, page: int | None = None) -> DatasetListResponse:
|
|
138
|
+
params = {"page": page} if page is not None else None
|
|
139
|
+
payload = await self._client._request_json(
|
|
140
|
+
"GET", DataGovSGHost.CATALOG, "/datasets", params=params
|
|
141
|
+
)
|
|
142
|
+
return DatasetListResponse.model_validate(self._client._catalog_data(payload))
|
|
143
|
+
|
|
144
|
+
async def get_metadata(self, dataset_id: str) -> DatasetMetadata:
|
|
145
|
+
path = f"/datasets/{dataset_id}/metadata"
|
|
146
|
+
payload = await self._client._request_json("GET", DataGovSGHost.CATALOG, path)
|
|
147
|
+
return DatasetMetadata.model_validate(self._client._catalog_data(payload))
|
|
148
|
+
|
|
149
|
+
async def list_rows(
|
|
150
|
+
self,
|
|
151
|
+
dataset_id: str,
|
|
152
|
+
*,
|
|
153
|
+
limit: int = 100,
|
|
154
|
+
cursor: str | None = None,
|
|
155
|
+
) -> DatasetRowsResponse:
|
|
156
|
+
path = f"/datasets/{dataset_id}/list-rows"
|
|
157
|
+
params = self._client._cursor_params(cursor, limit=limit)
|
|
158
|
+
payload = await self._client._request_json(
|
|
159
|
+
"GET", DataGovSGHost.CATALOG, path, params=params
|
|
160
|
+
)
|
|
161
|
+
return DatasetRowsResponse.model_validate(self._client._catalog_data(payload))
|
|
162
|
+
|
|
163
|
+
async def iter_rows(
|
|
164
|
+
self,
|
|
165
|
+
dataset_id: str,
|
|
166
|
+
*,
|
|
167
|
+
limit: int = 100,
|
|
168
|
+
cursor: str | None = None,
|
|
169
|
+
) -> AsyncIterator[DatasetRow]:
|
|
170
|
+
from publicsgdata._pagination import AsyncPageIterator
|
|
171
|
+
|
|
172
|
+
iterator: AsyncPageIterator[DatasetRow] = AsyncPageIterator(
|
|
173
|
+
lambda next_cursor: self.list_rows(dataset_id, limit=limit, cursor=next_cursor),
|
|
174
|
+
get_items=lambda response: response.rows,
|
|
175
|
+
get_next_cursor=lambda response: (
|
|
176
|
+
response.links.next if response.links is not None else None
|
|
177
|
+
),
|
|
178
|
+
)
|
|
179
|
+
iterator._cursor = cursor
|
|
180
|
+
async for row in iterator:
|
|
181
|
+
yield row
|
|
182
|
+
|
|
183
|
+
async def search(
|
|
184
|
+
self,
|
|
185
|
+
resource_id: str,
|
|
186
|
+
*,
|
|
187
|
+
limit: int = 100,
|
|
188
|
+
offset: int = 0,
|
|
189
|
+
filters: Mapping[str, Any] | None = None,
|
|
190
|
+
q: str | Mapping[str, Any] | None = None,
|
|
191
|
+
sort: str | None = None,
|
|
192
|
+
fields: str | None = None,
|
|
193
|
+
) -> DatastoreSearchResult:
|
|
194
|
+
params: dict[str, Any] = {
|
|
195
|
+
"resource_id": resource_id,
|
|
196
|
+
"limit": limit,
|
|
197
|
+
"offset": offset,
|
|
198
|
+
}
|
|
199
|
+
if filters is not None:
|
|
200
|
+
params["filters"] = json.dumps(filters)
|
|
201
|
+
if q is not None:
|
|
202
|
+
params["q"] = json.dumps(q) if isinstance(q, Mapping) else q
|
|
203
|
+
if sort is not None:
|
|
204
|
+
params["sort"] = sort
|
|
205
|
+
if fields is not None:
|
|
206
|
+
params["fields"] = fields
|
|
207
|
+
|
|
208
|
+
payload = await self._client._request_json(
|
|
209
|
+
"GET",
|
|
210
|
+
DataGovSGHost.CKAN,
|
|
211
|
+
"/api/action/datastore_search",
|
|
212
|
+
params=params,
|
|
213
|
+
)
|
|
214
|
+
result = payload.get("result", payload)
|
|
215
|
+
records = result.get("records", [])
|
|
216
|
+
return DatastoreSearchResult(
|
|
217
|
+
resource_id=result.get("resource_id", resource_id),
|
|
218
|
+
fields=result.get("fields", []),
|
|
219
|
+
records=records,
|
|
220
|
+
total=result.get("total", len(records)),
|
|
221
|
+
limit=result.get("limit", limit),
|
|
222
|
+
offset=offset,
|
|
223
|
+
links=result.get("_links", {}),
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
async def iter_search(
|
|
227
|
+
self,
|
|
228
|
+
resource_id: str,
|
|
229
|
+
*,
|
|
230
|
+
limit: int = 100,
|
|
231
|
+
offset: int = 0,
|
|
232
|
+
filters: Mapping[str, Any] | None = None,
|
|
233
|
+
q: str | Mapping[str, Any] | None = None,
|
|
234
|
+
sort: str | None = None,
|
|
235
|
+
) -> AsyncIterator[DatasetRow]:
|
|
236
|
+
iterator = AsyncCkanSearchIterator(
|
|
237
|
+
lambda page_offset: self.search(
|
|
238
|
+
resource_id,
|
|
239
|
+
limit=limit,
|
|
240
|
+
offset=page_offset,
|
|
241
|
+
filters=filters,
|
|
242
|
+
q=q,
|
|
243
|
+
sort=sort,
|
|
244
|
+
),
|
|
245
|
+
initial_offset=offset,
|
|
246
|
+
)
|
|
247
|
+
async for row in iterator:
|
|
248
|
+
yield row
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from publicsgdata.datagovsg.resources.realtime.pm25 import AsyncPM25Resource, PM25Resource
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class RealtimeResource:
|
|
5
|
+
def __init__(self, client: object) -> None:
|
|
6
|
+
self.pm25 = PM25Resource(client) # type: ignore[arg-type]
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class AsyncRealtimeResource:
|
|
10
|
+
def __init__(self, client: object) -> None:
|
|
11
|
+
self.pm25 = AsyncPM25Resource(client) # type: ignore[arg-type]
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
from publicsgdata.datagovsg._request import DataGovSGHost
|
|
6
|
+
from publicsgdata.datagovsg.models import PM25Response
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from publicsgdata.datagovsg.async_client import AsyncDataGovSGClient
|
|
10
|
+
from publicsgdata.datagovsg.client import DataGovSGClient
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class PM25Resource:
|
|
14
|
+
def __init__(self, client: DataGovSGClient) -> None:
|
|
15
|
+
self._client = client
|
|
16
|
+
|
|
17
|
+
def get(
|
|
18
|
+
self,
|
|
19
|
+
*,
|
|
20
|
+
date: str | None = None,
|
|
21
|
+
pagination_token: str | None = None,
|
|
22
|
+
) -> PM25Response:
|
|
23
|
+
params: dict[str, str] = {}
|
|
24
|
+
if date is not None:
|
|
25
|
+
params["date"] = date
|
|
26
|
+
if pagination_token is not None:
|
|
27
|
+
params["paginationToken"] = pagination_token
|
|
28
|
+
payload = self._client._request_json(
|
|
29
|
+
"GET",
|
|
30
|
+
DataGovSGHost.REALTIME,
|
|
31
|
+
"/pm25",
|
|
32
|
+
params=params or None,
|
|
33
|
+
)
|
|
34
|
+
return PM25Response.model_validate(self._client._realtime_data(payload))
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class AsyncPM25Resource:
|
|
38
|
+
def __init__(self, client: AsyncDataGovSGClient) -> None:
|
|
39
|
+
self._client = client
|
|
40
|
+
|
|
41
|
+
async def get(
|
|
42
|
+
self,
|
|
43
|
+
*,
|
|
44
|
+
date: str | None = None,
|
|
45
|
+
pagination_token: str | None = None,
|
|
46
|
+
) -> PM25Response:
|
|
47
|
+
params: dict[str, str] = {}
|
|
48
|
+
if date is not None:
|
|
49
|
+
params["date"] = date
|
|
50
|
+
if pagination_token is not None:
|
|
51
|
+
params["paginationToken"] = pagination_token
|
|
52
|
+
payload = await self._client._request_json(
|
|
53
|
+
"GET",
|
|
54
|
+
DataGovSGHost.REALTIME,
|
|
55
|
+
"/pm25",
|
|
56
|
+
params=params or None,
|
|
57
|
+
)
|
|
58
|
+
return PM25Response.model_validate(self._client._realtime_data(payload))
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: publicsgdata
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Python client for Singapore government open data (data.gov.sg, LTA, OneMap)
|
|
5
|
+
Project-URL: Homepage, https://github.com/publicsgdata/publicsgdata
|
|
6
|
+
Project-URL: Repository, https://github.com/publicsgdata/publicsgdata
|
|
7
|
+
Project-URL: Documentation, https://github.com/publicsgdata/publicsgdata#readme
|
|
8
|
+
Project-URL: Issues, https://github.com/publicsgdata/publicsgdata/issues
|
|
9
|
+
Author: publicsgdata contributors
|
|
10
|
+
License-Expression: MIT
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Keywords: api,data.gov.sg,open-data,sdk,singapore
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
22
|
+
Classifier: Typing :: Typed
|
|
23
|
+
Requires-Python: >=3.10
|
|
24
|
+
Requires-Dist: httpx<1,>=0.27.0
|
|
25
|
+
Requires-Dist: pydantic<3,>=2.0
|
|
26
|
+
Requires-Dist: typing-extensions<5,>=4.8
|
|
27
|
+
Provides-Extra: dev
|
|
28
|
+
Requires-Dist: build>=1.0; extra == 'dev'
|
|
29
|
+
Requires-Dist: mypy>=1.8; extra == 'dev'
|
|
30
|
+
Requires-Dist: pytest-asyncio>=0.24; extra == 'dev'
|
|
31
|
+
Requires-Dist: pytest-cov>=5.0; extra == 'dev'
|
|
32
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
33
|
+
Requires-Dist: ruff>=0.8; extra == 'dev'
|
|
34
|
+
Description-Content-Type: text/markdown
|
|
35
|
+
|
|
36
|
+
# publicsgdata
|
|
37
|
+
|
|
38
|
+
[](LICENSE)
|
|
39
|
+
|
|
40
|
+
Python client for Singapore government open data: data.gov.sg today, LTA and OneMap later.
|
|
41
|
+
|
|
42
|
+
## Install
|
|
43
|
+
|
|
44
|
+
Requires [uv](https://docs.astral.sh/uv/getting-started/installation/).
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
uv pip install publicsgdata
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Quickstart
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
from publicsgdata import DataGovSGClient
|
|
54
|
+
|
|
55
|
+
with DataGovSGClient() as client: # optional: api_key="..." or DATA_GOV_SG_API_KEY
|
|
56
|
+
catalog = client.collections.list()
|
|
57
|
+
print(f"{len(catalog.collections)} collections")
|
|
58
|
+
print(catalog.collections[0].name)
|
|
59
|
+
|
|
60
|
+
# HDB resale prices (swap in any dataset ID)
|
|
61
|
+
rows = client.datasets.list_rows("d_8b84c4ee58e3cfc0ece0d773c8ca6abc", limit=10)
|
|
62
|
+
for row in rows.rows:
|
|
63
|
+
print(row.model_dump())
|
|
64
|
+
|
|
65
|
+
pm25 = client.realtime.pm25.get()
|
|
66
|
+
print(pm25.items[0].readings)
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
### Async
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
from publicsgdata import AsyncDataGovSGClient
|
|
73
|
+
|
|
74
|
+
async with AsyncDataGovSGClient() as client:
|
|
75
|
+
rows = await client.datasets.list_rows("d_8b84c4ee58e3cfc0ece0d773c8ca6abc", limit=5)
|
|
76
|
+
print(len(rows.rows))
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### Custom HTTP client
|
|
80
|
+
|
|
81
|
+
Pass your own `httpx` client if you need custom timeouts, proxies, etc.
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
import httpx
|
|
85
|
+
from publicsgdata import DataGovSGClient
|
|
86
|
+
|
|
87
|
+
with httpx.Client(timeout=30.0) as http:
|
|
88
|
+
client = DataGovSGClient(http_client=http)
|
|
89
|
+
print(len(client.collections.list().collections))
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
## Authentication
|
|
93
|
+
|
|
94
|
+
You can call the API without a key while experimenting. For regular use, get a key from [data.gov.sg](https://data.gov.sg/) and set:
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
export DATA_GOV_SG_API_KEY="your-key"
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
## Environment variables
|
|
101
|
+
|
|
102
|
+
| Variable | Required | Description |
|
|
103
|
+
|---|---|---|
|
|
104
|
+
| `DATA_GOV_SG_API_KEY` | No | data.gov.sg API key (`x-api-key` header) |
|
|
105
|
+
|
|
106
|
+
## Development
|
|
107
|
+
|
|
108
|
+
You'll need [uv](https://docs.astral.sh/uv/getting-started/installation/).
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
./scripts/dev_setup.sh # creates .venv from uv.lock
|
|
112
|
+
./scripts/format.sh
|
|
113
|
+
./scripts/validate.sh
|
|
114
|
+
./scripts/test.sh # unit tests, runs in CI
|
|
115
|
+
./scripts/test_integration.sh # hits the real API, local only
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
Or run things directly: `uv run pytest`, `uv run ruff check .`, etc.
|
|
119
|
+
|
|
120
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md) if you're opening a PR.
|
|
121
|
+
|
|
122
|
+
## Roadmap
|
|
123
|
+
|
|
124
|
+
- **v0.1.0**: `DataGovSGClient`
|
|
125
|
+
- **v0.2.0**: `LTAClient` (LTA DataMall)
|
|
126
|
+
- **v0.3.0**: `OneMapClient`
|
|
127
|
+
|
|
128
|
+
## License
|
|
129
|
+
|
|
130
|
+
MIT. See [LICENSE](LICENSE).
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
publicsgdata/__init__.py,sha256=kHOfMJK12wYtUePZFhP9_92TsI-LRLPfQ63wGxTonBE,542
|
|
2
|
+
publicsgdata/_base_client.py,sha256=X1Yt2To2c6BsYJ6IsuCtnyF_E2tak7G5pp_7aY5Gqbg,4461
|
|
3
|
+
publicsgdata/_constants.py,sha256=wO3st5DA3qEUA-F-ZnUVRS_SkH9mbRKuQWmHPMj10tU,411
|
|
4
|
+
publicsgdata/_exceptions.py,sha256=9hijaCArdQf9_kQif0qAVzDatIW-1GemSZzZw8ch4c8,890
|
|
5
|
+
publicsgdata/_pagination.py,sha256=TStPrqGU6vt3OnAc1HZgTdLSQoxaWF8HyvZp9dy6IiY,4668
|
|
6
|
+
publicsgdata/datagovsg/__init__.py,sha256=DUckqefYi2A1yMVT6MpwoQgS3yYwoyZpDkjs7cScXXY,213
|
|
7
|
+
publicsgdata/datagovsg/_request.py,sha256=ZXgf0jdPOUZZuIdkWfxAb6vgNsuKWV3p3eXqUtoqS0Q,2047
|
|
8
|
+
publicsgdata/datagovsg/async_client.py,sha256=GhHJvDwLC01MNMnTWfPwTZ-Cerej2K8Fz1YCo0ExORg,2377
|
|
9
|
+
publicsgdata/datagovsg/client.py,sha256=r9H7Kmw4WQyQVumTpQe5GF300MsuS7LR5jF7NwOx_9w,2276
|
|
10
|
+
publicsgdata/datagovsg/models/__init__.py,sha256=sAyhoD5UzMfFElfUB4Ug8ZCbxsKJ1kM2AIbyXEtul2U,668
|
|
11
|
+
publicsgdata/datagovsg/models/common.py,sha256=H1IU2AJLOveVHDq5HpgOPRCjplQdEcRRD9u9i_JTLRk,4808
|
|
12
|
+
publicsgdata/datagovsg/resources/collections.py,sha256=WQYYwP3xaIJEh9VD1hGGxCWovR_PPVV0rIVo8SRXHck,1867
|
|
13
|
+
publicsgdata/datagovsg/resources/datasets.py,sha256=uIRdAsf4lX5KMI7rpqwJeTPKx-06EkuQ-0FHg5Gs0Ag,8505
|
|
14
|
+
publicsgdata/datagovsg/resources/realtime/__init__.py,sha256=ih0Qlw4FBLQ_DlFEPcziWKbLrRZYr6b8rW0PhX5htx4,383
|
|
15
|
+
publicsgdata/datagovsg/resources/realtime/pm25.py,sha256=MORU5kFFMBowYhPp9SHzj2TL4iPdqP_0AMlpAr0hOjo,1747
|
|
16
|
+
publicsgdata-0.1.0.dist-info/METADATA,sha256=KIewjVREndffBI3lx8YAd8bnjwmpFWnoEFrtP03ZdCk,3894
|
|
17
|
+
publicsgdata-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
18
|
+
publicsgdata-0.1.0.dist-info/licenses/LICENSE,sha256=dGDq1NgMiTjZ7uCBQxhaCI22b_iq-mx12z2RdZ0mHvc,1062
|
|
19
|
+
publicsgdata-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Harry
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|