fpu-barometer 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fpu_barometer-0.3.0/.gitignore +9 -0
- fpu_barometer-0.3.0/LICENSE.md +7 -0
- fpu_barometer-0.3.0/PKG-INFO +17 -0
- fpu_barometer-0.3.0/README.md +3 -0
- fpu_barometer-0.3.0/pyproject.toml +33 -0
- fpu_barometer-0.3.0/src/fpu_barometer/__init__.py +70 -0
- fpu_barometer-0.3.0/src/fpu_barometer/client.py +151 -0
- fpu_barometer-0.3.0/src/fpu_barometer/config.py +98 -0
- fpu_barometer-0.3.0/src/fpu_barometer/dataframe.py +77 -0
- fpu_barometer-0.3.0/src/fpu_barometer/models.py +501 -0
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
Copyright 2026 Free Press Unlimited
|
|
2
|
+
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
4
|
+
|
|
5
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
6
|
+
|
|
7
|
+
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: fpu-barometer
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: Free Press Unlimited researcher-facing API client
|
|
5
|
+
Project-URL: Homepage, https://www.freepressunlimited.org
|
|
6
|
+
Author: Phillip Kersten, Jannes Kelso, Jos Bartman
|
|
7
|
+
License-Expression: MIT
|
|
8
|
+
License-File: LICENSE.md
|
|
9
|
+
Keywords: barometer,data-science,fpu,journalists,press
|
|
10
|
+
Requires-Python: >=3.10
|
|
11
|
+
Requires-Dist: pandas>=2.0.0
|
|
12
|
+
Requires-Dist: requests>=2.28.0
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
|
|
15
|
+
# fpu-barometer
|
|
16
|
+
|
|
17
|
+
Lightweight researcher-facing API client for Barometer.
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "fpu-barometer"
|
|
7
|
+
version = "0.3.0"
|
|
8
|
+
description = "Free Press Unlimited researcher-facing API client"
|
|
9
|
+
authors = [
|
|
10
|
+
{name = "Phillip Kersten"},
|
|
11
|
+
{name = "Jannes Kelso"},
|
|
12
|
+
{name = "Jos Bartman"},
|
|
13
|
+
]
|
|
14
|
+
readme = "README.md"
|
|
15
|
+
requires-python = ">=3.10"
|
|
16
|
+
license = "MIT"
|
|
17
|
+
dependencies = [
|
|
18
|
+
"pandas>=2.0.0",
|
|
19
|
+
"requests>=2.28.0",
|
|
20
|
+
]
|
|
21
|
+
keywords = [
|
|
22
|
+
"press",
|
|
23
|
+
"journalists",
|
|
24
|
+
"fpu",
|
|
25
|
+
"barometer",
|
|
26
|
+
"data-science",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
[project.urls]
|
|
30
|
+
Homepage = "https://www.freepressunlimited.org"
|
|
31
|
+
|
|
32
|
+
[tool.hatch.build.targets.wheel]
|
|
33
|
+
packages = ["src/fpu_barometer"]
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""FPU/Barometer thin researcher-facing API client."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from fpu_barometer.client import FPUClient
|
|
8
|
+
from fpu_barometer.config import configure, get_config
|
|
9
|
+
from fpu_barometer.dataframe import response_to_dataframe
|
|
10
|
+
from fpu_barometer.models import (
|
|
11
|
+
DataResponse,
|
|
12
|
+
DatasetFilter,
|
|
13
|
+
DatasetListResponse,
|
|
14
|
+
DatasetStatus,
|
|
15
|
+
EnrichmentRequest,
|
|
16
|
+
EventsRequest,
|
|
17
|
+
InvalidCursorError,
|
|
18
|
+
PredictorsRequest,
|
|
19
|
+
decode_page_cursor,
|
|
20
|
+
encode_page_cursor,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
__version__ = "0.3.0"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def client() -> FPUClient:
|
|
27
|
+
return FPUClient()
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def get_events(events: str | list[str], countries: list[str] | None = None, years: list[int] | None = None, **kwargs: Any):
|
|
31
|
+
with FPUClient() as api:
|
|
32
|
+
return api.get_events(events, countries, years, **kwargs)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def get_predictors(predictors: str | list[str], countries: list[str] | None = None, years: list[int] | None = None, **kwargs: Any):
|
|
36
|
+
with FPUClient() as api:
|
|
37
|
+
return api.get_predictors(predictors, countries, years, **kwargs)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def enrich_events(events: str | list[str], predictors: str | list[str], countries: list[str] | None = None, years: list[int] | None = None, **kwargs: Any):
|
|
41
|
+
with FPUClient() as api:
|
|
42
|
+
return api.enrich_events(events, predictors, countries, years, **kwargs)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def list_datasets() -> dict[str, Any]:
|
|
46
|
+
with FPUClient() as api:
|
|
47
|
+
return api.list_datasets()
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
__all__ = [
|
|
51
|
+
"FPUClient",
|
|
52
|
+
"configure",
|
|
53
|
+
"get_config",
|
|
54
|
+
"client",
|
|
55
|
+
"get_events",
|
|
56
|
+
"get_predictors",
|
|
57
|
+
"enrich_events",
|
|
58
|
+
"list_datasets",
|
|
59
|
+
"response_to_dataframe",
|
|
60
|
+
"DataResponse",
|
|
61
|
+
"DatasetFilter",
|
|
62
|
+
"DatasetListResponse",
|
|
63
|
+
"DatasetStatus",
|
|
64
|
+
"EventsRequest",
|
|
65
|
+
"PredictorsRequest",
|
|
66
|
+
"EnrichmentRequest",
|
|
67
|
+
"InvalidCursorError",
|
|
68
|
+
"encode_page_cursor",
|
|
69
|
+
"decode_page_cursor",
|
|
70
|
+
]
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
"""Researcher-facing HTTP API client for Barometer."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import warnings
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
import pandas as pd
|
|
9
|
+
import requests
|
|
10
|
+
|
|
11
|
+
from fpu_barometer.config import FPUConfig, get_config
|
|
12
|
+
from fpu_barometer.dataframe import response_to_dataframe
|
|
13
|
+
from fpu_barometer.models import (
|
|
14
|
+
DataResponse,
|
|
15
|
+
DatasetListResponse,
|
|
16
|
+
EnrichmentRequest,
|
|
17
|
+
EventsRequest,
|
|
18
|
+
PredictorsRequest,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class FPUClient:
|
|
23
|
+
"""Thin HTTP client.
|
|
24
|
+
|
|
25
|
+
This class only sends HTTP requests. Backend resolution of current
|
|
26
|
+
Processed Dataset Versions happens behind the API.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(self, config: FPUConfig | None = None, session: requests.Session | None = None):
|
|
30
|
+
self.config = config or get_config()
|
|
31
|
+
self.base_url = self.config.api_base_url
|
|
32
|
+
self.session = session or requests.Session()
|
|
33
|
+
headers = {"Content-Type": "application/json"}
|
|
34
|
+
if self.config.api_key:
|
|
35
|
+
headers["Authorization"] = f"Bearer {self.config.api_key}"
|
|
36
|
+
self.session.headers.update(headers)
|
|
37
|
+
|
|
38
|
+
def get_events(self, events: str | list[str], countries: list[str] | None = None, years: list[int] | None = None, **kwargs: Any):
|
|
39
|
+
return self._paginated_data_response(
|
|
40
|
+
"events", EventsRequest,
|
|
41
|
+
events=_as_list(events), countries=countries, years=years, **kwargs,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
def get_predictors(self, predictors: str | list[str], countries: list[str] | None = None, years: list[int] | None = None, **kwargs: Any):
|
|
45
|
+
return self._paginated_data_response(
|
|
46
|
+
"predictors", PredictorsRequest,
|
|
47
|
+
predictors=_as_list(predictors), countries=countries, years=years, **kwargs,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
def enrich_events(self, events: str | list[str], predictors: str | list[str], countries: list[str] | None = None, years: list[int] | None = None, **kwargs: Any):
|
|
51
|
+
return self._paginated_data_response(
|
|
52
|
+
"enrich", EnrichmentRequest,
|
|
53
|
+
events=_as_list(events), predictors=_as_list(predictors), countries=countries, years=years, **kwargs,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
def list_datasets(self) -> dict[str, Any]:
|
|
57
|
+
return DatasetListResponse.from_dict(self._request("GET", "datasets")).to_dict()
|
|
58
|
+
|
|
59
|
+
def _paginated_data_response(self, route: str, request_cls: type, **kwargs: Any) -> pd.DataFrame:
|
|
60
|
+
"""Fetch all pages for a data response endpoint, looping on next_cursor."""
|
|
61
|
+
limit = _pop_client_limit(kwargs)
|
|
62
|
+
requested_page_size = kwargs.pop("page_size", 10_000)
|
|
63
|
+
warn_on_unbounded = kwargs.pop("warn_on_unbounded", True)
|
|
64
|
+
|
|
65
|
+
frames: list[pd.DataFrame] = []
|
|
66
|
+
warned = False
|
|
67
|
+
rows_so_far = 0
|
|
68
|
+
next_cursor: str | None = None
|
|
69
|
+
|
|
70
|
+
while True:
|
|
71
|
+
if limit is not None:
|
|
72
|
+
remaining = limit - rows_so_far
|
|
73
|
+
if remaining <= 0:
|
|
74
|
+
break
|
|
75
|
+
page_size = min(requested_page_size, remaining)
|
|
76
|
+
else:
|
|
77
|
+
page_size = requested_page_size
|
|
78
|
+
|
|
79
|
+
payload_kwargs = dict(kwargs)
|
|
80
|
+
payload_kwargs["page_size"] = page_size
|
|
81
|
+
if next_cursor is not None:
|
|
82
|
+
payload_kwargs["cursor"] = next_cursor
|
|
83
|
+
|
|
84
|
+
request = request_cls.from_dict(_payload(**payload_kwargs))
|
|
85
|
+
response = self.post_data_response(route, request.to_dict())
|
|
86
|
+
frame = response_to_dataframe(response)
|
|
87
|
+
frames.append(frame)
|
|
88
|
+
rows_so_far += len(frame)
|
|
89
|
+
|
|
90
|
+
next_cursor = response.next_cursor
|
|
91
|
+
|
|
92
|
+
if next_cursor is None:
|
|
93
|
+
break
|
|
94
|
+
|
|
95
|
+
if limit is None and warn_on_unbounded and not warned:
|
|
96
|
+
warnings.warn(
|
|
97
|
+
f"{route} is fetching {page_size} rows per page "
|
|
98
|
+
f"without a client limit; this may download many rows. "
|
|
99
|
+
f"Use `limit=N` to cap it, or `warn_on_unbounded=False` to suppress."
|
|
100
|
+
)
|
|
101
|
+
warned = True
|
|
102
|
+
|
|
103
|
+
result = pd.concat(frames, ignore_index=True) if frames else pd.DataFrame()
|
|
104
|
+
if limit is not None:
|
|
105
|
+
result = result.head(limit)
|
|
106
|
+
return result
|
|
107
|
+
|
|
108
|
+
def post_data_response(self, route: str, payload: dict[str, Any]) -> DataResponse:
|
|
109
|
+
return DataResponse.from_dict(self._request("POST", route, json=payload))
|
|
110
|
+
|
|
111
|
+
def _request(self, method: str, route: str, **kwargs: Any) -> dict[str, Any]:
|
|
112
|
+
response = self.session.request(
|
|
113
|
+
method,
|
|
114
|
+
f"{self.base_url}/{route.lstrip('/')}",
|
|
115
|
+
timeout=self.config.timeout,
|
|
116
|
+
**kwargs,
|
|
117
|
+
)
|
|
118
|
+
if response.status_code >= 400:
|
|
119
|
+
raise RuntimeError(f"API request failed: {response.status_code} - {response.text}")
|
|
120
|
+
return response.json()
|
|
121
|
+
|
|
122
|
+
def close(self) -> None:
|
|
123
|
+
self.session.close()
|
|
124
|
+
|
|
125
|
+
def __enter__(self) -> "FPUClient":
|
|
126
|
+
return self
|
|
127
|
+
|
|
128
|
+
def __exit__(self, exc_type, exc, tb) -> None:
|
|
129
|
+
self.close()
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _as_list(value: str | list[str]) -> list[str]:
|
|
133
|
+
if isinstance(value, str):
|
|
134
|
+
return [value]
|
|
135
|
+
return list(value)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _pop_client_limit(kwargs: dict[str, Any]) -> int | None:
|
|
139
|
+
if "limit" not in kwargs:
|
|
140
|
+
return None
|
|
141
|
+
limit = kwargs.pop("limit")
|
|
142
|
+
if limit is None:
|
|
143
|
+
return None
|
|
144
|
+
if isinstance(limit, bool) or not isinstance(limit, int) or limit < 1:
|
|
145
|
+
raise ValueError("limit must be a positive integer")
|
|
146
|
+
kwargs.setdefault("page_size", min(10_000, limit))
|
|
147
|
+
return limit
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _payload(**values: Any) -> dict[str, Any]:
|
|
151
|
+
return {key: value for key, value in values.items() if value is not None}
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""Endpoint/auth/timeout configuration for the thin Barometer API client."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from dataclasses import asdict, dataclass
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class FPUConfig:
|
|
13
|
+
"""Researcher-facing API client configuration."""
|
|
14
|
+
|
|
15
|
+
api_endpoint: str = "https://barometer-api-prod-flex.azurewebsites.net"
|
|
16
|
+
api_key: str | None = None
|
|
17
|
+
timeout: int = 30
|
|
18
|
+
max_retries: int = 3
|
|
19
|
+
|
|
20
|
+
def __post_init__(self) -> None:
|
|
21
|
+
if self.api_key is None:
|
|
22
|
+
self.api_key = os.getenv("FPU_API_KEY")
|
|
23
|
+
|
|
24
|
+
@property
|
|
25
|
+
def api_base_url(self) -> str:
|
|
26
|
+
base_url = self.api_endpoint.rstrip("/")
|
|
27
|
+
return base_url if base_url.endswith("/api") else f"{base_url}/api"
|
|
28
|
+
|
|
29
|
+
def to_dict(self) -> dict[str, Any]:
|
|
30
|
+
data = asdict(self)
|
|
31
|
+
data.pop("api_key", None)
|
|
32
|
+
return data
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
_config: FPUConfig | None = None
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def configure(**kwargs: Any) -> FPUConfig:
|
|
39
|
+
"""Configure the thin HTTP API client."""
|
|
40
|
+
global _config
|
|
41
|
+
# Backward-compatible aliases: mode/local data paths are intentionally ignored
|
|
42
|
+
# because the public package is now HTTP-only.
|
|
43
|
+
kwargs.pop("mode", None)
|
|
44
|
+
kwargs.pop("data_path", None)
|
|
45
|
+
kwargs.pop("cache_enabled", None)
|
|
46
|
+
kwargs.pop("cache_path", None)
|
|
47
|
+
kwargs.pop("local_db_path", None)
|
|
48
|
+
kwargs.pop("chunk_size", None)
|
|
49
|
+
kwargs.pop("compression", None)
|
|
50
|
+
|
|
51
|
+
if _config is None:
|
|
52
|
+
_config = FPUConfig(**kwargs)
|
|
53
|
+
else:
|
|
54
|
+
for key, value in kwargs.items():
|
|
55
|
+
if not hasattr(_config, key):
|
|
56
|
+
raise ValueError(f"Unknown configuration parameter: {key}")
|
|
57
|
+
setattr(_config, key, value)
|
|
58
|
+
_config.__post_init__()
|
|
59
|
+
return _config
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def get_config() -> FPUConfig:
|
|
63
|
+
global _config
|
|
64
|
+
if _config is None:
|
|
65
|
+
_config = FPUConfig()
|
|
66
|
+
return _config
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def reset_config() -> None:
|
|
70
|
+
global _config
|
|
71
|
+
_config = None
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def get_api_base_url() -> str:
|
|
75
|
+
return get_config().api_base_url
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def set_api_endpoint(endpoint: str) -> FPUConfig:
|
|
79
|
+
return configure(api_endpoint=endpoint)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def load_config_from_file(config_path: str) -> FPUConfig:
|
|
83
|
+
path = Path(config_path)
|
|
84
|
+
if not path.exists():
|
|
85
|
+
raise FileNotFoundError(f"Configuration file not found: {path}")
|
|
86
|
+
if path.suffix == ".json":
|
|
87
|
+
import json
|
|
88
|
+
return configure(**json.loads(path.read_text()))
|
|
89
|
+
raise ValueError(f"Unsupported configuration file format: {path.suffix}")
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def save_config_to_file(config_path: str, config: FPUConfig | None = None) -> None:
|
|
93
|
+
path = Path(config_path)
|
|
94
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
95
|
+
if path.suffix != ".json":
|
|
96
|
+
raise ValueError(f"Unsupported configuration file format: {path.suffix}")
|
|
97
|
+
import json
|
|
98
|
+
path.write_text(json.dumps((config or get_config()).to_dict(), indent=2))
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""Helpers for converting Barometer JSON/tabular API responses to pandas."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
import pandas as pd
|
|
8
|
+
|
|
9
|
+
from fpu_barometer.models import DataResponse
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def response_to_dataframe(
|
|
13
|
+
response: DataResponse | dict[str, Any] | list[dict[str, Any]],
|
|
14
|
+
) -> pd.DataFrame:
|
|
15
|
+
"""Convert a shared DataResponse, response dict, or records list to a DataFrame."""
|
|
16
|
+
if isinstance(response, DataResponse):
|
|
17
|
+
records = response.data
|
|
18
|
+
elif isinstance(response, dict):
|
|
19
|
+
records = response.get("data", [])
|
|
20
|
+
else:
|
|
21
|
+
records = response
|
|
22
|
+
return pd.DataFrame(records)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def dataframe_to_response(df: pd.DataFrame, **metadata: Any) -> DataResponse:
|
|
26
|
+
"""Convert a DataFrame to the shared tabular response contract."""
|
|
27
|
+
query_time_seconds = metadata.pop("query_time_seconds", None)
|
|
28
|
+
next_cursor = metadata.pop("next_cursor", None)
|
|
29
|
+
page_size = metadata.pop("page_size", None)
|
|
30
|
+
sort_by = metadata.pop("sort_by", None)
|
|
31
|
+
sort_order = metadata.pop("sort_order", None)
|
|
32
|
+
records = [
|
|
33
|
+
{key: _jsonable_cell(value) for key, value in record.items()}
|
|
34
|
+
for record in df.to_dict(orient="records")
|
|
35
|
+
]
|
|
36
|
+
return DataResponse(
|
|
37
|
+
data=records,
|
|
38
|
+
rows=len(records),
|
|
39
|
+
query_time_seconds=query_time_seconds,
|
|
40
|
+
metadata=metadata,
|
|
41
|
+
next_cursor=next_cursor,
|
|
42
|
+
page_size=page_size,
|
|
43
|
+
sort_by=sort_by,
|
|
44
|
+
sort_order=sort_order,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _jsonable_cell(value: Any) -> Any:
|
|
49
|
+
if value is None:
|
|
50
|
+
return None
|
|
51
|
+
if _is_numpy_array(value):
|
|
52
|
+
items = value.tolist()
|
|
53
|
+
if not isinstance(items, list):
|
|
54
|
+
return _jsonable_cell(items)
|
|
55
|
+
return [_jsonable_cell(item) for item in items]
|
|
56
|
+
if isinstance(value, (list, tuple)):
|
|
57
|
+
return [_jsonable_cell(item) for item in value]
|
|
58
|
+
if isinstance(value, dict):
|
|
59
|
+
return {key: _jsonable_cell(item) for key, item in value.items()}
|
|
60
|
+
try:
|
|
61
|
+
if pd.isna(value):
|
|
62
|
+
return None
|
|
63
|
+
except (TypeError, ValueError):
|
|
64
|
+
pass
|
|
65
|
+
item = getattr(value, "item", None)
|
|
66
|
+
if callable(item):
|
|
67
|
+
try:
|
|
68
|
+
return item()
|
|
69
|
+
except (TypeError, ValueError):
|
|
70
|
+
pass
|
|
71
|
+
return value
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _is_numpy_array(value: Any) -> bool:
|
|
75
|
+
return (
|
|
76
|
+
type(value).__module__.startswith("numpy") and type(value).__name__ == "ndarray"
|
|
77
|
+
)
|
|
@@ -0,0 +1,501 @@
|
|
|
1
|
+
"""Shared request/response contracts for the Barometer HTTP API."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import base64
|
|
6
|
+
import binascii
|
|
7
|
+
from dataclasses import asdict, dataclass, field
|
|
8
|
+
import json
|
|
9
|
+
import re
|
|
10
|
+
from typing import Any, Literal
|
|
11
|
+
|
|
12
|
+
DatasetKind = Literal["events", "predictors"]
|
|
13
|
+
AvailabilityStatus = Literal["available", "stale", "unavailable"]
|
|
14
|
+
RunStatus = Literal["running", "success", "failed", "skipped"]
|
|
15
|
+
ExpectedCadence = Literal["static", "daily", "monthly", "annual", "ad_hoc"]
|
|
16
|
+
RefreshMode = Literal["full_refresh", "incremental"]
|
|
17
|
+
SourceType = Literal[
|
|
18
|
+
"static_file",
|
|
19
|
+
"api_endpoint",
|
|
20
|
+
"watched_file",
|
|
21
|
+
"zipped_file_download",
|
|
22
|
+
"release_asset",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
_DATASET_NAME_PATTERN = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
|
|
26
|
+
_ISO3_PATTERN = re.compile(r"^[A-Z]{3}$")
|
|
27
|
+
_DATASET_KINDS = {"events", "predictors"}
|
|
28
|
+
_AVAILABILITY_STATUSES = {"available", "stale", "unavailable"}
|
|
29
|
+
_RUN_STATUSES = {"running", "success", "failed", "skipped"}
|
|
30
|
+
_EXPECTED_CADENCES = {"static", "daily", "monthly", "annual", "ad_hoc"}
|
|
31
|
+
_REFRESH_MODES = {"full_refresh", "incremental"}
|
|
32
|
+
_SOURCE_TYPES = {
|
|
33
|
+
"static_file",
|
|
34
|
+
"api_endpoint",
|
|
35
|
+
"watched_file",
|
|
36
|
+
"zipped_file_download",
|
|
37
|
+
"release_asset",
|
|
38
|
+
"web_download",
|
|
39
|
+
}
|
|
40
|
+
_MAX_PAGE_SIZE = 10_000
|
|
41
|
+
_SORT_ORDERS = {"asc", "desc"}
|
|
42
|
+
_DATASET_STATUS_FIELDS = {
|
|
43
|
+
"dataset",
|
|
44
|
+
"kind",
|
|
45
|
+
"availability_status",
|
|
46
|
+
"last_run_status",
|
|
47
|
+
"last_successful_refresh",
|
|
48
|
+
"last_attempted_refresh",
|
|
49
|
+
"expected_cadence",
|
|
50
|
+
"refresh_mode",
|
|
51
|
+
"source_type",
|
|
52
|
+
"current_version_id",
|
|
53
|
+
"row_count",
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class APIContractError(ValueError):
|
|
58
|
+
"""Base error for invalid public API contracts."""
|
|
59
|
+
|
|
60
|
+
code = "invalid_request"
|
|
61
|
+
|
|
62
|
+
def __init__(self, message: str, *, details: dict[str, Any] | None = None):
|
|
63
|
+
super().__init__(message)
|
|
64
|
+
self.details = details or {}
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class InvalidRequestError(APIContractError):
|
|
68
|
+
"""Request body shape does not match the public API contract."""
|
|
69
|
+
|
|
70
|
+
code = "invalid_request"
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class InvalidFilterError(APIContractError):
|
|
74
|
+
"""Request filters do not match the public API contract."""
|
|
75
|
+
|
|
76
|
+
code = "invalid_filter"
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class InvalidCursorError(APIContractError):
|
|
80
|
+
"""Page Cursor cannot be decoded as a public API cursor."""
|
|
81
|
+
|
|
82
|
+
code = "invalid_cursor"
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@dataclass(frozen=True)
|
|
86
|
+
class DatasetFilter:
|
|
87
|
+
"""Common country/year filters accepted by data requests."""
|
|
88
|
+
|
|
89
|
+
countries: list[str] | None = None
|
|
90
|
+
years: list[int] | None = None
|
|
91
|
+
|
|
92
|
+
@classmethod
|
|
93
|
+
def from_dict(cls, data: dict[str, Any] | None) -> "DatasetFilter":
|
|
94
|
+
data = data or {}
|
|
95
|
+
if not isinstance(data, dict):
|
|
96
|
+
raise InvalidRequestError("Request body must be a JSON object", details={})
|
|
97
|
+
countries = _optional_array(data, "countries", _validate_country, InvalidFilterError)
|
|
98
|
+
years = _optional_array(data, "years", _validate_year, InvalidFilterError)
|
|
99
|
+
return cls(countries=countries, years=years)
|
|
100
|
+
|
|
101
|
+
def to_dict(self) -> dict[str, Any]:
|
|
102
|
+
return {k: v for k, v in asdict(self).items() if v is not None}
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@dataclass(frozen=True)
|
|
106
|
+
class EventsRequest:
|
|
107
|
+
events: list[str]
|
|
108
|
+
filters: DatasetFilter = field(default_factory=DatasetFilter)
|
|
109
|
+
page_size: int = _MAX_PAGE_SIZE
|
|
110
|
+
cursor: str | None = None
|
|
111
|
+
sort_by: str = "date"
|
|
112
|
+
sort_order: str = "desc"
|
|
113
|
+
|
|
114
|
+
@classmethod
|
|
115
|
+
def from_dict(cls, data: dict[str, Any]) -> "EventsRequest":
|
|
116
|
+
_validate_object(data)
|
|
117
|
+
_reject_removed_limit(data)
|
|
118
|
+
events = _required_dataset_array(data, "events")
|
|
119
|
+
_reject_unknown_fields(data, {"events", "countries", "years", "page_size", "cursor", "sort_by", "sort_order"})
|
|
120
|
+
pagination = _parse_pagination(data, allowed_sort_by={"date"}, default_sort_by="date")
|
|
121
|
+
return cls(events=events, filters=DatasetFilter.from_dict(data), **pagination)
|
|
122
|
+
|
|
123
|
+
def to_dict(self) -> dict[str, Any]:
|
|
124
|
+
return {
|
|
125
|
+
"events": self.events,
|
|
126
|
+
**self.filters.to_dict(),
|
|
127
|
+
**_pagination_to_dict(self.page_size, self.cursor, self.sort_by, self.sort_order),
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
@dataclass(frozen=True)
|
|
132
|
+
class PredictorsRequest:
|
|
133
|
+
predictors: list[str]
|
|
134
|
+
filters: DatasetFilter = field(default_factory=DatasetFilter)
|
|
135
|
+
page_size: int = _MAX_PAGE_SIZE
|
|
136
|
+
cursor: str | None = None
|
|
137
|
+
sort_by: str = "year"
|
|
138
|
+
sort_order: str = "desc"
|
|
139
|
+
|
|
140
|
+
@classmethod
|
|
141
|
+
def from_dict(cls, data: dict[str, Any]) -> "PredictorsRequest":
|
|
142
|
+
_validate_object(data)
|
|
143
|
+
_reject_removed_limit(data)
|
|
144
|
+
predictors = _required_dataset_array(data, "predictors")
|
|
145
|
+
_reject_unknown_fields(data, {"predictors", "countries", "years", "page_size", "cursor", "sort_by", "sort_order"})
|
|
146
|
+
pagination = _parse_pagination(data, allowed_sort_by={"year"}, default_sort_by="year")
|
|
147
|
+
return cls(predictors=predictors, filters=DatasetFilter.from_dict(data), **pagination)
|
|
148
|
+
|
|
149
|
+
def to_dict(self) -> dict[str, Any]:
|
|
150
|
+
return {
|
|
151
|
+
"predictors": self.predictors,
|
|
152
|
+
**self.filters.to_dict(),
|
|
153
|
+
**_pagination_to_dict(self.page_size, self.cursor, self.sort_by, self.sort_order),
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
@dataclass(frozen=True)
|
|
158
|
+
class EnrichmentRequest:
|
|
159
|
+
events: list[str]
|
|
160
|
+
predictors: list[str]
|
|
161
|
+
filters: DatasetFilter = field(default_factory=DatasetFilter)
|
|
162
|
+
page_size: int = _MAX_PAGE_SIZE
|
|
163
|
+
cursor: str | None = None
|
|
164
|
+
sort_by: str = "date"
|
|
165
|
+
sort_order: str = "desc"
|
|
166
|
+
|
|
167
|
+
@classmethod
|
|
168
|
+
def from_dict(cls, data: dict[str, Any]) -> "EnrichmentRequest":
|
|
169
|
+
_validate_object(data)
|
|
170
|
+
_reject_removed_limit(data)
|
|
171
|
+
events = _required_dataset_array(data, "events")
|
|
172
|
+
predictors = _required_dataset_array(data, "predictors")
|
|
173
|
+
_reject_unknown_fields(data, {"events", "predictors", "countries", "years", "page_size", "cursor", "sort_by", "sort_order"})
|
|
174
|
+
pagination = _parse_pagination(data, allowed_sort_by={"date"}, default_sort_by="date")
|
|
175
|
+
return cls(
|
|
176
|
+
events=events,
|
|
177
|
+
predictors=predictors,
|
|
178
|
+
filters=DatasetFilter.from_dict(data),
|
|
179
|
+
**pagination,
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
def to_dict(self) -> dict[str, Any]:
|
|
183
|
+
return {
|
|
184
|
+
"events": self.events,
|
|
185
|
+
"predictors": self.predictors,
|
|
186
|
+
**self.filters.to_dict(),
|
|
187
|
+
**_pagination_to_dict(self.page_size, self.cursor, self.sort_by, self.sort_order),
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
@dataclass(frozen=True)
|
|
192
|
+
class DatasetStatus:
|
|
193
|
+
"""Public Dataset Status item returned by the dataset listing endpoint."""
|
|
194
|
+
|
|
195
|
+
dataset: str
|
|
196
|
+
kind: DatasetKind
|
|
197
|
+
availability_status: AvailabilityStatus
|
|
198
|
+
last_run_status: RunStatus | None
|
|
199
|
+
last_successful_refresh: str | None
|
|
200
|
+
last_attempted_refresh: str | None
|
|
201
|
+
expected_cadence: ExpectedCadence
|
|
202
|
+
refresh_mode: RefreshMode
|
|
203
|
+
source_type: SourceType
|
|
204
|
+
current_version_id: str | None
|
|
205
|
+
row_count: int | None
|
|
206
|
+
|
|
207
|
+
@classmethod
|
|
208
|
+
def from_dict(cls, data: dict[str, Any]) -> "DatasetStatus":
|
|
209
|
+
_validate_object(data)
|
|
210
|
+
_require_fields(data, _DATASET_STATUS_FIELDS, "Dataset status")
|
|
211
|
+
_reject_unknown_fields(data, _DATASET_STATUS_FIELDS)
|
|
212
|
+
dataset = _validate_dataset_name(data["dataset"], "dataset")
|
|
213
|
+
return cls(
|
|
214
|
+
dataset=dataset,
|
|
215
|
+
kind=_literal(data["kind"], "kind", _DATASET_KINDS),
|
|
216
|
+
availability_status=_literal(
|
|
217
|
+
data["availability_status"],
|
|
218
|
+
"availability_status",
|
|
219
|
+
_AVAILABILITY_STATUSES,
|
|
220
|
+
),
|
|
221
|
+
last_run_status=_nullable_literal(
|
|
222
|
+
data["last_run_status"], "last_run_status", _RUN_STATUSES
|
|
223
|
+
),
|
|
224
|
+
last_successful_refresh=_nullable_str(
|
|
225
|
+
data["last_successful_refresh"], "last_successful_refresh"
|
|
226
|
+
),
|
|
227
|
+
last_attempted_refresh=_nullable_str(
|
|
228
|
+
data["last_attempted_refresh"], "last_attempted_refresh"
|
|
229
|
+
),
|
|
230
|
+
expected_cadence=_literal(
|
|
231
|
+
data["expected_cadence"], "expected_cadence", _EXPECTED_CADENCES
|
|
232
|
+
),
|
|
233
|
+
refresh_mode=_literal(data["refresh_mode"], "refresh_mode", _REFRESH_MODES),
|
|
234
|
+
source_type=_literal(data["source_type"], "source_type", _SOURCE_TYPES),
|
|
235
|
+
current_version_id=_nullable_str(
|
|
236
|
+
data["current_version_id"], "current_version_id"
|
|
237
|
+
),
|
|
238
|
+
row_count=_nullable_nonnegative_int(data["row_count"], "row_count"),
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
def to_dict(self) -> dict[str, Any]:
|
|
242
|
+
return asdict(self)
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
@dataclass(frozen=True)
|
|
246
|
+
class DatasetListResponse:
|
|
247
|
+
"""Public dataset listing response."""
|
|
248
|
+
|
|
249
|
+
datasets: list[DatasetStatus]
|
|
250
|
+
|
|
251
|
+
@classmethod
|
|
252
|
+
def from_dict(cls, data: dict[str, Any]) -> "DatasetListResponse":
|
|
253
|
+
_validate_object(data)
|
|
254
|
+
_require_fields(data, {"datasets"}, "Dataset list response")
|
|
255
|
+
_reject_unknown_fields(data, {"datasets"})
|
|
256
|
+
datasets = data["datasets"]
|
|
257
|
+
if not isinstance(datasets, list):
|
|
258
|
+
raise InvalidRequestError(
|
|
259
|
+
"datasets must be an array", details={"field": "datasets"}
|
|
260
|
+
)
|
|
261
|
+
return cls(datasets=[DatasetStatus.from_dict(item) for item in datasets])
|
|
262
|
+
|
|
263
|
+
def to_dict(self) -> dict[str, Any]:
|
|
264
|
+
return {"datasets": [dataset.to_dict() for dataset in self.datasets]}
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
@dataclass(frozen=True)
|
|
268
|
+
class DataResponse:
|
|
269
|
+
"""Tabular API response shared by client and handlers."""
|
|
270
|
+
|
|
271
|
+
data: list[dict[str, Any]]
|
|
272
|
+
rows: int | None = None
|
|
273
|
+
query_time_seconds: float | None = None
|
|
274
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
275
|
+
next_cursor: str | None = None
|
|
276
|
+
page_size: int | None = None
|
|
277
|
+
sort_by: str | None = None
|
|
278
|
+
sort_order: str | None = None
|
|
279
|
+
|
|
280
|
+
@classmethod
|
|
281
|
+
def from_dict(cls, data: dict[str, Any]) -> "DataResponse":
|
|
282
|
+
known = {
|
|
283
|
+
"data",
|
|
284
|
+
"rows",
|
|
285
|
+
"query_time_seconds",
|
|
286
|
+
"metadata",
|
|
287
|
+
"next_cursor",
|
|
288
|
+
"page_size",
|
|
289
|
+
"sort_by",
|
|
290
|
+
"sort_order",
|
|
291
|
+
}
|
|
292
|
+
metadata = dict(data.get("metadata") or {})
|
|
293
|
+
metadata.update({k: v for k, v in data.items() if k not in known})
|
|
294
|
+
return cls(
|
|
295
|
+
data=list(data.get("data", [])),
|
|
296
|
+
rows=data.get("rows"),
|
|
297
|
+
query_time_seconds=data.get("query_time_seconds"),
|
|
298
|
+
metadata=metadata,
|
|
299
|
+
next_cursor=data.get("next_cursor"),
|
|
300
|
+
page_size=data.get("page_size"),
|
|
301
|
+
sort_by=data.get("sort_by"),
|
|
302
|
+
sort_order=data.get("sort_order"),
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
def to_dict(self) -> dict[str, Any]:
|
|
306
|
+
payload: dict[str, Any] = {"data": self.data, "rows": self.rows if self.rows is not None else len(self.data)}
|
|
307
|
+
if self.query_time_seconds is not None:
|
|
308
|
+
payload["query_time_seconds"] = self.query_time_seconds
|
|
309
|
+
if self.metadata:
|
|
310
|
+
payload["metadata"] = self.metadata
|
|
311
|
+
if self.next_cursor is not None:
|
|
312
|
+
payload["next_cursor"] = self.next_cursor
|
|
313
|
+
if self.page_size is not None:
|
|
314
|
+
payload["page_size"] = self.page_size
|
|
315
|
+
if self.sort_by is not None:
|
|
316
|
+
payload["sort_by"] = self.sort_by
|
|
317
|
+
if self.sort_order is not None:
|
|
318
|
+
payload["sort_order"] = self.sort_order
|
|
319
|
+
return payload
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
def encode_page_cursor(payload: dict[str, Any]) -> str:
|
|
323
|
+
"""Encode a Page Cursor payload as URL-safe base64 JSON."""
|
|
324
|
+
|
|
325
|
+
if not isinstance(payload, dict):
|
|
326
|
+
raise InvalidCursorError("cursor payload must be a JSON object", details={"field": "cursor"})
|
|
327
|
+
raw = json.dumps(payload, sort_keys=True, separators=(",", ":")).encode("utf-8")
|
|
328
|
+
return base64.urlsafe_b64encode(raw).decode("ascii").rstrip("=")
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def decode_page_cursor(cursor: str) -> dict[str, Any]:
|
|
332
|
+
"""Decode an opaque Page Cursor string into its JSON object payload."""
|
|
333
|
+
|
|
334
|
+
if not isinstance(cursor, str) or not cursor:
|
|
335
|
+
raise InvalidCursorError("cursor is invalid", details={"field": "cursor"})
|
|
336
|
+
try:
|
|
337
|
+
padded = cursor + "=" * (-len(cursor) % 4)
|
|
338
|
+
raw = base64.urlsafe_b64decode(padded.encode("ascii"))
|
|
339
|
+
payload = json.loads(raw.decode("utf-8"))
|
|
340
|
+
except (binascii.Error, json.JSONDecodeError, UnicodeDecodeError, ValueError):
|
|
341
|
+
raise InvalidCursorError("cursor is invalid", details={"field": "cursor"}) from None
|
|
342
|
+
if not isinstance(payload, dict) or not payload:
|
|
343
|
+
raise InvalidCursorError("cursor is invalid", details={"field": "cursor"})
|
|
344
|
+
return payload
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
def _validate_object(data: Any) -> None:
|
|
348
|
+
if not isinstance(data, dict):
|
|
349
|
+
raise InvalidRequestError("Request body must be a JSON object", details={})
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
def _reject_removed_limit(data: dict[str, Any]) -> None:
|
|
353
|
+
if "limit" in data:
|
|
354
|
+
raise InvalidRequestError(
|
|
355
|
+
"limit is no longer accepted; use page_size and cursor",
|
|
356
|
+
details={"field": "limit"},
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
def _reject_unknown_fields(data: dict[str, Any], allowed: set[str]) -> None:
|
|
361
|
+
unknown = sorted(set(data) - allowed)
|
|
362
|
+
if unknown:
|
|
363
|
+
raise InvalidRequestError(
|
|
364
|
+
f"Unknown request field: {unknown[0]}",
|
|
365
|
+
details={"field": unknown[0]},
|
|
366
|
+
)
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
def _parse_pagination(
|
|
370
|
+
data: dict[str, Any], *, allowed_sort_by: set[str], default_sort_by: str
|
|
371
|
+
) -> dict[str, int | str | None]:
|
|
372
|
+
page_size = data.get("page_size", _MAX_PAGE_SIZE)
|
|
373
|
+
if isinstance(page_size, bool) or not isinstance(page_size, int) or not 1 <= page_size <= _MAX_PAGE_SIZE:
|
|
374
|
+
raise InvalidRequestError(
|
|
375
|
+
f"page_size must be an integer between 1 and {_MAX_PAGE_SIZE}",
|
|
376
|
+
details={"field": "page_size"},
|
|
377
|
+
)
|
|
378
|
+
cursor = data.get("cursor")
|
|
379
|
+
if cursor is not None:
|
|
380
|
+
decode_page_cursor(cursor)
|
|
381
|
+
sort_by = data.get("sort_by", default_sort_by)
|
|
382
|
+
if not isinstance(sort_by, str) or sort_by not in allowed_sort_by:
|
|
383
|
+
raise InvalidRequestError("sort_by has an unsupported value", details={"field": "sort_by"})
|
|
384
|
+
sort_order = data.get("sort_order", "desc")
|
|
385
|
+
if not isinstance(sort_order, str) or sort_order not in _SORT_ORDERS:
|
|
386
|
+
raise InvalidRequestError(
|
|
387
|
+
"sort_order has an unsupported value", details={"field": "sort_order"}
|
|
388
|
+
)
|
|
389
|
+
return {
|
|
390
|
+
"page_size": page_size,
|
|
391
|
+
"cursor": cursor,
|
|
392
|
+
"sort_by": sort_by,
|
|
393
|
+
"sort_order": sort_order,
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
def _pagination_to_dict(
|
|
398
|
+
page_size: int, cursor: str | None, sort_by: str, sort_order: str
|
|
399
|
+
) -> dict[str, int | str]:
|
|
400
|
+
payload: dict[str, int | str] = {
|
|
401
|
+
"page_size": page_size,
|
|
402
|
+
"sort_by": sort_by,
|
|
403
|
+
"sort_order": sort_order,
|
|
404
|
+
}
|
|
405
|
+
if cursor is not None:
|
|
406
|
+
payload["cursor"] = cursor
|
|
407
|
+
return payload
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
def _require_fields(data: dict[str, Any], required: set[str], label: str) -> None:
|
|
411
|
+
missing = sorted(required - set(data))
|
|
412
|
+
if missing:
|
|
413
|
+
raise InvalidRequestError(
|
|
414
|
+
f"{label} missing required field: {missing[0]}",
|
|
415
|
+
details={"field": missing[0]},
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
def _validate_dataset_name(value: Any, field: str) -> str:
|
|
420
|
+
if not isinstance(value, str) or not _DATASET_NAME_PATTERN.fullmatch(value):
|
|
421
|
+
raise InvalidRequestError(
|
|
422
|
+
f"{field} must be a safe dataset name", details={"field": field}
|
|
423
|
+
)
|
|
424
|
+
return value
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
def _literal(value: Any, field: str, allowed: set[str]) -> Any:
|
|
428
|
+
if not isinstance(value, str) or value not in allowed:
|
|
429
|
+
raise InvalidRequestError(
|
|
430
|
+
f"{field} has an unsupported value", details={"field": field}
|
|
431
|
+
)
|
|
432
|
+
return value
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
def _nullable_literal(value: Any, field: str, allowed: set[str]) -> Any | None:
|
|
436
|
+
if value is None:
|
|
437
|
+
return None
|
|
438
|
+
return _literal(value, field, allowed)
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
def _nullable_str(value: Any, field: str) -> str | None:
|
|
442
|
+
if value is None:
|
|
443
|
+
return None
|
|
444
|
+
if not isinstance(value, str):
|
|
445
|
+
raise InvalidRequestError(f"{field} must be a string", details={"field": field})
|
|
446
|
+
return value
|
|
447
|
+
|
|
448
|
+
|
|
449
|
+
def _nullable_nonnegative_int(value: Any, field: str) -> int | None:
|
|
450
|
+
if value is None:
|
|
451
|
+
return None
|
|
452
|
+
if isinstance(value, bool) or not isinstance(value, int) or value < 0:
|
|
453
|
+
raise InvalidRequestError(
|
|
454
|
+
f"{field} must be a non-negative integer", details={"field": field}
|
|
455
|
+
)
|
|
456
|
+
return value
|
|
457
|
+
|
|
458
|
+
|
|
459
|
+
def _required_dataset_array(data: dict[str, Any], field: str) -> list[str]:
|
|
460
|
+
if field not in data:
|
|
461
|
+
raise InvalidRequestError(f"Missing required field: {field}", details={"field": field})
|
|
462
|
+
return _dataset_array(data[field], field)
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
def _dataset_array(value: Any, field: str) -> list[str]:
|
|
466
|
+
if not isinstance(value, list) or not value:
|
|
467
|
+
raise InvalidRequestError(f"{field} must be a non-empty array", details={"field": field})
|
|
468
|
+
datasets: list[str] = []
|
|
469
|
+
for item in value:
|
|
470
|
+
if not isinstance(item, str) or not _DATASET_NAME_PATTERN.fullmatch(item):
|
|
471
|
+
raise InvalidRequestError(
|
|
472
|
+
f"{field} must contain safe dataset names",
|
|
473
|
+
details={"field": field},
|
|
474
|
+
)
|
|
475
|
+
datasets.append(item)
|
|
476
|
+
return datasets
|
|
477
|
+
|
|
478
|
+
|
|
479
|
+
def _optional_array(data: dict[str, Any], field: str, validate_item, error_type: type[APIContractError]) -> list[Any] | None:
|
|
480
|
+
if field not in data:
|
|
481
|
+
return None
|
|
482
|
+
value = data[field]
|
|
483
|
+
if value is None:
|
|
484
|
+
raise error_type(f"{field} must be an array", details={"field": field})
|
|
485
|
+
if not isinstance(value, list):
|
|
486
|
+
raise error_type(f"{field} must be an array", details={"field": field})
|
|
487
|
+
if not value:
|
|
488
|
+
raise error_type(f"{field} must be a non-empty array", details={"field": field})
|
|
489
|
+
return [validate_item(item, field) for item in value]
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
def _validate_country(value: Any, field: str) -> str:
|
|
493
|
+
if not isinstance(value, str) or not _ISO3_PATTERN.fullmatch(value):
|
|
494
|
+
raise InvalidFilterError("countries must be ISO3 country codes", details={"field": field})
|
|
495
|
+
return value
|
|
496
|
+
|
|
497
|
+
|
|
498
|
+
def _validate_year(value: Any, field: str) -> int:
|
|
499
|
+
if isinstance(value, bool) or not isinstance(value, int):
|
|
500
|
+
raise InvalidFilterError("years must be integers", details={"field": field})
|
|
501
|
+
return value
|