fpu-barometer 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,9 @@
1
+ data/*
2
+ .env
3
+ .pytest_cache/
4
+ .venv/
5
+ __pycache__/
6
+ *.pyc
7
+ *.egg-info/
8
+ *.whl
9
+ .ruff_cache/
@@ -0,0 +1,7 @@
1
+ Copyright 2026 Free Press Unlimited
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4
+
5
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6
+
7
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,17 @@
1
+ Metadata-Version: 2.4
2
+ Name: fpu-barometer
3
+ Version: 0.3.0
4
+ Summary: Free Press Unlimited researcher-facing API client
5
+ Project-URL: Homepage, https://www.freepressunlimited.org
6
+ Author: Phillip Kersten, Jannes Kelso, Jos Bartman
7
+ License-Expression: MIT
8
+ License-File: LICENSE.md
9
+ Keywords: barometer,data-science,fpu,journalists,press
10
+ Requires-Python: >=3.10
11
+ Requires-Dist: pandas>=2.0.0
12
+ Requires-Dist: requests>=2.28.0
13
+ Description-Content-Type: text/markdown
14
+
15
+ # fpu-barometer
16
+
17
+ Lightweight researcher-facing API client for Barometer.
@@ -0,0 +1,3 @@
1
+ # fpu-barometer
2
+
3
+ Lightweight researcher-facing API client for Barometer.
@@ -0,0 +1,33 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "fpu-barometer"
7
+ version = "0.3.0"
8
+ description = "Free Press Unlimited researcher-facing API client"
9
+ authors = [
10
+ {name = "Phillip Kersten"},
11
+ {name = "Jannes Kelso"},
12
+ {name = "Jos Bartman"},
13
+ ]
14
+ readme = "README.md"
15
+ requires-python = ">=3.10"
16
+ license = "MIT"
17
+ dependencies = [
18
+ "pandas>=2.0.0",
19
+ "requests>=2.28.0",
20
+ ]
21
+ keywords = [
22
+ "press",
23
+ "journalists",
24
+ "fpu",
25
+ "barometer",
26
+ "data-science",
27
+ ]
28
+
29
+ [project.urls]
30
+ Homepage = "https://www.freepressunlimited.org"
31
+
32
+ [tool.hatch.build.targets.wheel]
33
+ packages = ["src/fpu_barometer"]
@@ -0,0 +1,70 @@
1
+ """FPU/Barometer thin researcher-facing API client."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from fpu_barometer.client import FPUClient
8
+ from fpu_barometer.config import configure, get_config
9
+ from fpu_barometer.dataframe import response_to_dataframe
10
+ from fpu_barometer.models import (
11
+ DataResponse,
12
+ DatasetFilter,
13
+ DatasetListResponse,
14
+ DatasetStatus,
15
+ EnrichmentRequest,
16
+ EventsRequest,
17
+ InvalidCursorError,
18
+ PredictorsRequest,
19
+ decode_page_cursor,
20
+ encode_page_cursor,
21
+ )
22
+
23
+ __version__ = "0.3.0"
24
+
25
+
26
+ def client() -> FPUClient:
27
+ return FPUClient()
28
+
29
+
30
+ def get_events(events: str | list[str], countries: list[str] | None = None, years: list[int] | None = None, **kwargs: Any):
31
+ with FPUClient() as api:
32
+ return api.get_events(events, countries, years, **kwargs)
33
+
34
+
35
+ def get_predictors(predictors: str | list[str], countries: list[str] | None = None, years: list[int] | None = None, **kwargs: Any):
36
+ with FPUClient() as api:
37
+ return api.get_predictors(predictors, countries, years, **kwargs)
38
+
39
+
40
+ def enrich_events(events: str | list[str], predictors: str | list[str], countries: list[str] | None = None, years: list[int] | None = None, **kwargs: Any):
41
+ with FPUClient() as api:
42
+ return api.enrich_events(events, predictors, countries, years, **kwargs)
43
+
44
+
45
+ def list_datasets() -> dict[str, Any]:
46
+ with FPUClient() as api:
47
+ return api.list_datasets()
48
+
49
+
50
+ __all__ = [
51
+ "FPUClient",
52
+ "configure",
53
+ "get_config",
54
+ "client",
55
+ "get_events",
56
+ "get_predictors",
57
+ "enrich_events",
58
+ "list_datasets",
59
+ "response_to_dataframe",
60
+ "DataResponse",
61
+ "DatasetFilter",
62
+ "DatasetListResponse",
63
+ "DatasetStatus",
64
+ "EventsRequest",
65
+ "PredictorsRequest",
66
+ "EnrichmentRequest",
67
+ "InvalidCursorError",
68
+ "encode_page_cursor",
69
+ "decode_page_cursor",
70
+ ]
@@ -0,0 +1,151 @@
1
+ """Researcher-facing HTTP API client for Barometer."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import warnings
6
+ from typing import Any
7
+
8
+ import pandas as pd
9
+ import requests
10
+
11
+ from fpu_barometer.config import FPUConfig, get_config
12
+ from fpu_barometer.dataframe import response_to_dataframe
13
+ from fpu_barometer.models import (
14
+ DataResponse,
15
+ DatasetListResponse,
16
+ EnrichmentRequest,
17
+ EventsRequest,
18
+ PredictorsRequest,
19
+ )
20
+
21
+
22
+ class FPUClient:
23
+ """Thin HTTP client.
24
+
25
+ This class only sends HTTP requests. Backend resolution of current
26
+ Processed Dataset Versions happens behind the API.
27
+ """
28
+
29
+ def __init__(self, config: FPUConfig | None = None, session: requests.Session | None = None):
30
+ self.config = config or get_config()
31
+ self.base_url = self.config.api_base_url
32
+ self.session = session or requests.Session()
33
+ headers = {"Content-Type": "application/json"}
34
+ if self.config.api_key:
35
+ headers["Authorization"] = f"Bearer {self.config.api_key}"
36
+ self.session.headers.update(headers)
37
+
38
+ def get_events(self, events: str | list[str], countries: list[str] | None = None, years: list[int] | None = None, **kwargs: Any):
39
+ return self._paginated_data_response(
40
+ "events", EventsRequest,
41
+ events=_as_list(events), countries=countries, years=years, **kwargs,
42
+ )
43
+
44
+ def get_predictors(self, predictors: str | list[str], countries: list[str] | None = None, years: list[int] | None = None, **kwargs: Any):
45
+ return self._paginated_data_response(
46
+ "predictors", PredictorsRequest,
47
+ predictors=_as_list(predictors), countries=countries, years=years, **kwargs,
48
+ )
49
+
50
+ def enrich_events(self, events: str | list[str], predictors: str | list[str], countries: list[str] | None = None, years: list[int] | None = None, **kwargs: Any):
51
+ return self._paginated_data_response(
52
+ "enrich", EnrichmentRequest,
53
+ events=_as_list(events), predictors=_as_list(predictors), countries=countries, years=years, **kwargs,
54
+ )
55
+
56
+ def list_datasets(self) -> dict[str, Any]:
57
+ return DatasetListResponse.from_dict(self._request("GET", "datasets")).to_dict()
58
+
59
+ def _paginated_data_response(self, route: str, request_cls: type, **kwargs: Any) -> pd.DataFrame:
60
+ """Fetch all pages for a data response endpoint, looping on next_cursor."""
61
+ limit = _pop_client_limit(kwargs)
62
+ requested_page_size = kwargs.pop("page_size", 10_000)
63
+ warn_on_unbounded = kwargs.pop("warn_on_unbounded", True)
64
+
65
+ frames: list[pd.DataFrame] = []
66
+ warned = False
67
+ rows_so_far = 0
68
+ next_cursor: str | None = None
69
+
70
+ while True:
71
+ if limit is not None:
72
+ remaining = limit - rows_so_far
73
+ if remaining <= 0:
74
+ break
75
+ page_size = min(requested_page_size, remaining)
76
+ else:
77
+ page_size = requested_page_size
78
+
79
+ payload_kwargs = dict(kwargs)
80
+ payload_kwargs["page_size"] = page_size
81
+ if next_cursor is not None:
82
+ payload_kwargs["cursor"] = next_cursor
83
+
84
+ request = request_cls.from_dict(_payload(**payload_kwargs))
85
+ response = self.post_data_response(route, request.to_dict())
86
+ frame = response_to_dataframe(response)
87
+ frames.append(frame)
88
+ rows_so_far += len(frame)
89
+
90
+ next_cursor = response.next_cursor
91
+
92
+ if next_cursor is None:
93
+ break
94
+
95
+ if limit is None and warn_on_unbounded and not warned:
96
+ warnings.warn(
97
+ f"{route} is fetching {page_size} rows per page "
98
+ f"without a client limit; this may download many rows. "
99
+ f"Use `limit=N` to cap it, or `warn_on_unbounded=False` to suppress."
100
+ )
101
+ warned = True
102
+
103
+ result = pd.concat(frames, ignore_index=True) if frames else pd.DataFrame()
104
+ if limit is not None:
105
+ result = result.head(limit)
106
+ return result
107
+
108
+ def post_data_response(self, route: str, payload: dict[str, Any]) -> DataResponse:
109
+ return DataResponse.from_dict(self._request("POST", route, json=payload))
110
+
111
+ def _request(self, method: str, route: str, **kwargs: Any) -> dict[str, Any]:
112
+ response = self.session.request(
113
+ method,
114
+ f"{self.base_url}/{route.lstrip('/')}",
115
+ timeout=self.config.timeout,
116
+ **kwargs,
117
+ )
118
+ if response.status_code >= 400:
119
+ raise RuntimeError(f"API request failed: {response.status_code} - {response.text}")
120
+ return response.json()
121
+
122
+ def close(self) -> None:
123
+ self.session.close()
124
+
125
+ def __enter__(self) -> "FPUClient":
126
+ return self
127
+
128
+ def __exit__(self, exc_type, exc, tb) -> None:
129
+ self.close()
130
+
131
+
132
+ def _as_list(value: str | list[str]) -> list[str]:
133
+ if isinstance(value, str):
134
+ return [value]
135
+ return list(value)
136
+
137
+
138
+ def _pop_client_limit(kwargs: dict[str, Any]) -> int | None:
139
+ if "limit" not in kwargs:
140
+ return None
141
+ limit = kwargs.pop("limit")
142
+ if limit is None:
143
+ return None
144
+ if isinstance(limit, bool) or not isinstance(limit, int) or limit < 1:
145
+ raise ValueError("limit must be a positive integer")
146
+ kwargs.setdefault("page_size", min(10_000, limit))
147
+ return limit
148
+
149
+
150
+ def _payload(**values: Any) -> dict[str, Any]:
151
+ return {key: value for key, value in values.items() if value is not None}
@@ -0,0 +1,98 @@
1
+ """Endpoint/auth/timeout configuration for the thin Barometer API client."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from dataclasses import asdict, dataclass
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+
11
+ @dataclass
12
+ class FPUConfig:
13
+ """Researcher-facing API client configuration."""
14
+
15
+ api_endpoint: str = "https://barometer-api-prod-flex.azurewebsites.net"
16
+ api_key: str | None = None
17
+ timeout: int = 30
18
+ max_retries: int = 3
19
+
20
+ def __post_init__(self) -> None:
21
+ if self.api_key is None:
22
+ self.api_key = os.getenv("FPU_API_KEY")
23
+
24
+ @property
25
+ def api_base_url(self) -> str:
26
+ base_url = self.api_endpoint.rstrip("/")
27
+ return base_url if base_url.endswith("/api") else f"{base_url}/api"
28
+
29
+ def to_dict(self) -> dict[str, Any]:
30
+ data = asdict(self)
31
+ data.pop("api_key", None)
32
+ return data
33
+
34
+
35
+ _config: FPUConfig | None = None
36
+
37
+
38
+ def configure(**kwargs: Any) -> FPUConfig:
39
+ """Configure the thin HTTP API client."""
40
+ global _config
41
+ # Backward-compatible aliases: mode/local data paths are intentionally ignored
42
+ # because the public package is now HTTP-only.
43
+ kwargs.pop("mode", None)
44
+ kwargs.pop("data_path", None)
45
+ kwargs.pop("cache_enabled", None)
46
+ kwargs.pop("cache_path", None)
47
+ kwargs.pop("local_db_path", None)
48
+ kwargs.pop("chunk_size", None)
49
+ kwargs.pop("compression", None)
50
+
51
+ if _config is None:
52
+ _config = FPUConfig(**kwargs)
53
+ else:
54
+ for key, value in kwargs.items():
55
+ if not hasattr(_config, key):
56
+ raise ValueError(f"Unknown configuration parameter: {key}")
57
+ setattr(_config, key, value)
58
+ _config.__post_init__()
59
+ return _config
60
+
61
+
62
+ def get_config() -> FPUConfig:
63
+ global _config
64
+ if _config is None:
65
+ _config = FPUConfig()
66
+ return _config
67
+
68
+
69
+ def reset_config() -> None:
70
+ global _config
71
+ _config = None
72
+
73
+
74
+ def get_api_base_url() -> str:
75
+ return get_config().api_base_url
76
+
77
+
78
+ def set_api_endpoint(endpoint: str) -> FPUConfig:
79
+ return configure(api_endpoint=endpoint)
80
+
81
+
82
+ def load_config_from_file(config_path: str) -> FPUConfig:
83
+ path = Path(config_path)
84
+ if not path.exists():
85
+ raise FileNotFoundError(f"Configuration file not found: {path}")
86
+ if path.suffix == ".json":
87
+ import json
88
+ return configure(**json.loads(path.read_text()))
89
+ raise ValueError(f"Unsupported configuration file format: {path.suffix}")
90
+
91
+
92
+ def save_config_to_file(config_path: str, config: FPUConfig | None = None) -> None:
93
+ path = Path(config_path)
94
+ path.parent.mkdir(parents=True, exist_ok=True)
95
+ if path.suffix != ".json":
96
+ raise ValueError(f"Unsupported configuration file format: {path.suffix}")
97
+ import json
98
+ path.write_text(json.dumps((config or get_config()).to_dict(), indent=2))
@@ -0,0 +1,77 @@
1
+ """Helpers for converting Barometer JSON/tabular API responses to pandas."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ import pandas as pd
8
+
9
+ from fpu_barometer.models import DataResponse
10
+
11
+
12
+ def response_to_dataframe(
13
+ response: DataResponse | dict[str, Any] | list[dict[str, Any]],
14
+ ) -> pd.DataFrame:
15
+ """Convert a shared DataResponse, response dict, or records list to a DataFrame."""
16
+ if isinstance(response, DataResponse):
17
+ records = response.data
18
+ elif isinstance(response, dict):
19
+ records = response.get("data", [])
20
+ else:
21
+ records = response
22
+ return pd.DataFrame(records)
23
+
24
+
25
+ def dataframe_to_response(df: pd.DataFrame, **metadata: Any) -> DataResponse:
26
+ """Convert a DataFrame to the shared tabular response contract."""
27
+ query_time_seconds = metadata.pop("query_time_seconds", None)
28
+ next_cursor = metadata.pop("next_cursor", None)
29
+ page_size = metadata.pop("page_size", None)
30
+ sort_by = metadata.pop("sort_by", None)
31
+ sort_order = metadata.pop("sort_order", None)
32
+ records = [
33
+ {key: _jsonable_cell(value) for key, value in record.items()}
34
+ for record in df.to_dict(orient="records")
35
+ ]
36
+ return DataResponse(
37
+ data=records,
38
+ rows=len(records),
39
+ query_time_seconds=query_time_seconds,
40
+ metadata=metadata,
41
+ next_cursor=next_cursor,
42
+ page_size=page_size,
43
+ sort_by=sort_by,
44
+ sort_order=sort_order,
45
+ )
46
+
47
+
48
+ def _jsonable_cell(value: Any) -> Any:
49
+ if value is None:
50
+ return None
51
+ if _is_numpy_array(value):
52
+ items = value.tolist()
53
+ if not isinstance(items, list):
54
+ return _jsonable_cell(items)
55
+ return [_jsonable_cell(item) for item in items]
56
+ if isinstance(value, (list, tuple)):
57
+ return [_jsonable_cell(item) for item in value]
58
+ if isinstance(value, dict):
59
+ return {key: _jsonable_cell(item) for key, item in value.items()}
60
+ try:
61
+ if pd.isna(value):
62
+ return None
63
+ except (TypeError, ValueError):
64
+ pass
65
+ item = getattr(value, "item", None)
66
+ if callable(item):
67
+ try:
68
+ return item()
69
+ except (TypeError, ValueError):
70
+ pass
71
+ return value
72
+
73
+
74
+ def _is_numpy_array(value: Any) -> bool:
75
+ return (
76
+ type(value).__module__.startswith("numpy") and type(value).__name__ == "ndarray"
77
+ )
@@ -0,0 +1,501 @@
1
+ """Shared request/response contracts for the Barometer HTTP API."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import base64
6
+ import binascii
7
+ from dataclasses import asdict, dataclass, field
8
+ import json
9
+ import re
10
+ from typing import Any, Literal
11
+
12
+ DatasetKind = Literal["events", "predictors"]
13
+ AvailabilityStatus = Literal["available", "stale", "unavailable"]
14
+ RunStatus = Literal["running", "success", "failed", "skipped"]
15
+ ExpectedCadence = Literal["static", "daily", "monthly", "annual", "ad_hoc"]
16
+ RefreshMode = Literal["full_refresh", "incremental"]
17
+ SourceType = Literal[
18
+ "static_file",
19
+ "api_endpoint",
20
+ "watched_file",
21
+ "zipped_file_download",
22
+ "release_asset",
23
+ ]
24
+
25
+ _DATASET_NAME_PATTERN = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
26
+ _ISO3_PATTERN = re.compile(r"^[A-Z]{3}$")
27
+ _DATASET_KINDS = {"events", "predictors"}
28
+ _AVAILABILITY_STATUSES = {"available", "stale", "unavailable"}
29
+ _RUN_STATUSES = {"running", "success", "failed", "skipped"}
30
+ _EXPECTED_CADENCES = {"static", "daily", "monthly", "annual", "ad_hoc"}
31
+ _REFRESH_MODES = {"full_refresh", "incremental"}
32
+ _SOURCE_TYPES = {
33
+ "static_file",
34
+ "api_endpoint",
35
+ "watched_file",
36
+ "zipped_file_download",
37
+ "release_asset",
38
+ "web_download",
39
+ }
40
+ _MAX_PAGE_SIZE = 10_000
41
+ _SORT_ORDERS = {"asc", "desc"}
42
+ _DATASET_STATUS_FIELDS = {
43
+ "dataset",
44
+ "kind",
45
+ "availability_status",
46
+ "last_run_status",
47
+ "last_successful_refresh",
48
+ "last_attempted_refresh",
49
+ "expected_cadence",
50
+ "refresh_mode",
51
+ "source_type",
52
+ "current_version_id",
53
+ "row_count",
54
+ }
55
+
56
+
57
+ class APIContractError(ValueError):
58
+ """Base error for invalid public API contracts."""
59
+
60
+ code = "invalid_request"
61
+
62
+ def __init__(self, message: str, *, details: dict[str, Any] | None = None):
63
+ super().__init__(message)
64
+ self.details = details or {}
65
+
66
+
67
+ class InvalidRequestError(APIContractError):
68
+ """Request body shape does not match the public API contract."""
69
+
70
+ code = "invalid_request"
71
+
72
+
73
+ class InvalidFilterError(APIContractError):
74
+ """Request filters do not match the public API contract."""
75
+
76
+ code = "invalid_filter"
77
+
78
+
79
+ class InvalidCursorError(APIContractError):
80
+ """Page Cursor cannot be decoded as a public API cursor."""
81
+
82
+ code = "invalid_cursor"
83
+
84
+
85
+ @dataclass(frozen=True)
86
+ class DatasetFilter:
87
+ """Common country/year filters accepted by data requests."""
88
+
89
+ countries: list[str] | None = None
90
+ years: list[int] | None = None
91
+
92
+ @classmethod
93
+ def from_dict(cls, data: dict[str, Any] | None) -> "DatasetFilter":
94
+ data = data or {}
95
+ if not isinstance(data, dict):
96
+ raise InvalidRequestError("Request body must be a JSON object", details={})
97
+ countries = _optional_array(data, "countries", _validate_country, InvalidFilterError)
98
+ years = _optional_array(data, "years", _validate_year, InvalidFilterError)
99
+ return cls(countries=countries, years=years)
100
+
101
+ def to_dict(self) -> dict[str, Any]:
102
+ return {k: v for k, v in asdict(self).items() if v is not None}
103
+
104
+
105
+ @dataclass(frozen=True)
106
+ class EventsRequest:
107
+ events: list[str]
108
+ filters: DatasetFilter = field(default_factory=DatasetFilter)
109
+ page_size: int = _MAX_PAGE_SIZE
110
+ cursor: str | None = None
111
+ sort_by: str = "date"
112
+ sort_order: str = "desc"
113
+
114
+ @classmethod
115
+ def from_dict(cls, data: dict[str, Any]) -> "EventsRequest":
116
+ _validate_object(data)
117
+ _reject_removed_limit(data)
118
+ events = _required_dataset_array(data, "events")
119
+ _reject_unknown_fields(data, {"events", "countries", "years", "page_size", "cursor", "sort_by", "sort_order"})
120
+ pagination = _parse_pagination(data, allowed_sort_by={"date"}, default_sort_by="date")
121
+ return cls(events=events, filters=DatasetFilter.from_dict(data), **pagination)
122
+
123
+ def to_dict(self) -> dict[str, Any]:
124
+ return {
125
+ "events": self.events,
126
+ **self.filters.to_dict(),
127
+ **_pagination_to_dict(self.page_size, self.cursor, self.sort_by, self.sort_order),
128
+ }
129
+
130
+
131
+ @dataclass(frozen=True)
132
+ class PredictorsRequest:
133
+ predictors: list[str]
134
+ filters: DatasetFilter = field(default_factory=DatasetFilter)
135
+ page_size: int = _MAX_PAGE_SIZE
136
+ cursor: str | None = None
137
+ sort_by: str = "year"
138
+ sort_order: str = "desc"
139
+
140
+ @classmethod
141
+ def from_dict(cls, data: dict[str, Any]) -> "PredictorsRequest":
142
+ _validate_object(data)
143
+ _reject_removed_limit(data)
144
+ predictors = _required_dataset_array(data, "predictors")
145
+ _reject_unknown_fields(data, {"predictors", "countries", "years", "page_size", "cursor", "sort_by", "sort_order"})
146
+ pagination = _parse_pagination(data, allowed_sort_by={"year"}, default_sort_by="year")
147
+ return cls(predictors=predictors, filters=DatasetFilter.from_dict(data), **pagination)
148
+
149
+ def to_dict(self) -> dict[str, Any]:
150
+ return {
151
+ "predictors": self.predictors,
152
+ **self.filters.to_dict(),
153
+ **_pagination_to_dict(self.page_size, self.cursor, self.sort_by, self.sort_order),
154
+ }
155
+
156
+
157
+ @dataclass(frozen=True)
158
+ class EnrichmentRequest:
159
+ events: list[str]
160
+ predictors: list[str]
161
+ filters: DatasetFilter = field(default_factory=DatasetFilter)
162
+ page_size: int = _MAX_PAGE_SIZE
163
+ cursor: str | None = None
164
+ sort_by: str = "date"
165
+ sort_order: str = "desc"
166
+
167
+ @classmethod
168
+ def from_dict(cls, data: dict[str, Any]) -> "EnrichmentRequest":
169
+ _validate_object(data)
170
+ _reject_removed_limit(data)
171
+ events = _required_dataset_array(data, "events")
172
+ predictors = _required_dataset_array(data, "predictors")
173
+ _reject_unknown_fields(data, {"events", "predictors", "countries", "years", "page_size", "cursor", "sort_by", "sort_order"})
174
+ pagination = _parse_pagination(data, allowed_sort_by={"date"}, default_sort_by="date")
175
+ return cls(
176
+ events=events,
177
+ predictors=predictors,
178
+ filters=DatasetFilter.from_dict(data),
179
+ **pagination,
180
+ )
181
+
182
+ def to_dict(self) -> dict[str, Any]:
183
+ return {
184
+ "events": self.events,
185
+ "predictors": self.predictors,
186
+ **self.filters.to_dict(),
187
+ **_pagination_to_dict(self.page_size, self.cursor, self.sort_by, self.sort_order),
188
+ }
189
+
190
+
191
+ @dataclass(frozen=True)
192
+ class DatasetStatus:
193
+ """Public Dataset Status item returned by the dataset listing endpoint."""
194
+
195
+ dataset: str
196
+ kind: DatasetKind
197
+ availability_status: AvailabilityStatus
198
+ last_run_status: RunStatus | None
199
+ last_successful_refresh: str | None
200
+ last_attempted_refresh: str | None
201
+ expected_cadence: ExpectedCadence
202
+ refresh_mode: RefreshMode
203
+ source_type: SourceType
204
+ current_version_id: str | None
205
+ row_count: int | None
206
+
207
+ @classmethod
208
+ def from_dict(cls, data: dict[str, Any]) -> "DatasetStatus":
209
+ _validate_object(data)
210
+ _require_fields(data, _DATASET_STATUS_FIELDS, "Dataset status")
211
+ _reject_unknown_fields(data, _DATASET_STATUS_FIELDS)
212
+ dataset = _validate_dataset_name(data["dataset"], "dataset")
213
+ return cls(
214
+ dataset=dataset,
215
+ kind=_literal(data["kind"], "kind", _DATASET_KINDS),
216
+ availability_status=_literal(
217
+ data["availability_status"],
218
+ "availability_status",
219
+ _AVAILABILITY_STATUSES,
220
+ ),
221
+ last_run_status=_nullable_literal(
222
+ data["last_run_status"], "last_run_status", _RUN_STATUSES
223
+ ),
224
+ last_successful_refresh=_nullable_str(
225
+ data["last_successful_refresh"], "last_successful_refresh"
226
+ ),
227
+ last_attempted_refresh=_nullable_str(
228
+ data["last_attempted_refresh"], "last_attempted_refresh"
229
+ ),
230
+ expected_cadence=_literal(
231
+ data["expected_cadence"], "expected_cadence", _EXPECTED_CADENCES
232
+ ),
233
+ refresh_mode=_literal(data["refresh_mode"], "refresh_mode", _REFRESH_MODES),
234
+ source_type=_literal(data["source_type"], "source_type", _SOURCE_TYPES),
235
+ current_version_id=_nullable_str(
236
+ data["current_version_id"], "current_version_id"
237
+ ),
238
+ row_count=_nullable_nonnegative_int(data["row_count"], "row_count"),
239
+ )
240
+
241
+ def to_dict(self) -> dict[str, Any]:
242
+ return asdict(self)
243
+
244
+
245
+ @dataclass(frozen=True)
246
+ class DatasetListResponse:
247
+ """Public dataset listing response."""
248
+
249
+ datasets: list[DatasetStatus]
250
+
251
+ @classmethod
252
+ def from_dict(cls, data: dict[str, Any]) -> "DatasetListResponse":
253
+ _validate_object(data)
254
+ _require_fields(data, {"datasets"}, "Dataset list response")
255
+ _reject_unknown_fields(data, {"datasets"})
256
+ datasets = data["datasets"]
257
+ if not isinstance(datasets, list):
258
+ raise InvalidRequestError(
259
+ "datasets must be an array", details={"field": "datasets"}
260
+ )
261
+ return cls(datasets=[DatasetStatus.from_dict(item) for item in datasets])
262
+
263
+ def to_dict(self) -> dict[str, Any]:
264
+ return {"datasets": [dataset.to_dict() for dataset in self.datasets]}
265
+
266
+
267
+ @dataclass(frozen=True)
268
+ class DataResponse:
269
+ """Tabular API response shared by client and handlers."""
270
+
271
+ data: list[dict[str, Any]]
272
+ rows: int | None = None
273
+ query_time_seconds: float | None = None
274
+ metadata: dict[str, Any] = field(default_factory=dict)
275
+ next_cursor: str | None = None
276
+ page_size: int | None = None
277
+ sort_by: str | None = None
278
+ sort_order: str | None = None
279
+
280
+ @classmethod
281
+ def from_dict(cls, data: dict[str, Any]) -> "DataResponse":
282
+ known = {
283
+ "data",
284
+ "rows",
285
+ "query_time_seconds",
286
+ "metadata",
287
+ "next_cursor",
288
+ "page_size",
289
+ "sort_by",
290
+ "sort_order",
291
+ }
292
+ metadata = dict(data.get("metadata") or {})
293
+ metadata.update({k: v for k, v in data.items() if k not in known})
294
+ return cls(
295
+ data=list(data.get("data", [])),
296
+ rows=data.get("rows"),
297
+ query_time_seconds=data.get("query_time_seconds"),
298
+ metadata=metadata,
299
+ next_cursor=data.get("next_cursor"),
300
+ page_size=data.get("page_size"),
301
+ sort_by=data.get("sort_by"),
302
+ sort_order=data.get("sort_order"),
303
+ )
304
+
305
+ def to_dict(self) -> dict[str, Any]:
306
+ payload: dict[str, Any] = {"data": self.data, "rows": self.rows if self.rows is not None else len(self.data)}
307
+ if self.query_time_seconds is not None:
308
+ payload["query_time_seconds"] = self.query_time_seconds
309
+ if self.metadata:
310
+ payload["metadata"] = self.metadata
311
+ if self.next_cursor is not None:
312
+ payload["next_cursor"] = self.next_cursor
313
+ if self.page_size is not None:
314
+ payload["page_size"] = self.page_size
315
+ if self.sort_by is not None:
316
+ payload["sort_by"] = self.sort_by
317
+ if self.sort_order is not None:
318
+ payload["sort_order"] = self.sort_order
319
+ return payload
320
+
321
+
322
+ def encode_page_cursor(payload: dict[str, Any]) -> str:
323
+ """Encode a Page Cursor payload as URL-safe base64 JSON."""
324
+
325
+ if not isinstance(payload, dict):
326
+ raise InvalidCursorError("cursor payload must be a JSON object", details={"field": "cursor"})
327
+ raw = json.dumps(payload, sort_keys=True, separators=(",", ":")).encode("utf-8")
328
+ return base64.urlsafe_b64encode(raw).decode("ascii").rstrip("=")
329
+
330
+
331
+ def decode_page_cursor(cursor: str) -> dict[str, Any]:
332
+ """Decode an opaque Page Cursor string into its JSON object payload."""
333
+
334
+ if not isinstance(cursor, str) or not cursor:
335
+ raise InvalidCursorError("cursor is invalid", details={"field": "cursor"})
336
+ try:
337
+ padded = cursor + "=" * (-len(cursor) % 4)
338
+ raw = base64.urlsafe_b64decode(padded.encode("ascii"))
339
+ payload = json.loads(raw.decode("utf-8"))
340
+ except (binascii.Error, json.JSONDecodeError, UnicodeDecodeError, ValueError):
341
+ raise InvalidCursorError("cursor is invalid", details={"field": "cursor"}) from None
342
+ if not isinstance(payload, dict) or not payload:
343
+ raise InvalidCursorError("cursor is invalid", details={"field": "cursor"})
344
+ return payload
345
+
346
+
347
+ def _validate_object(data: Any) -> None:
348
+ if not isinstance(data, dict):
349
+ raise InvalidRequestError("Request body must be a JSON object", details={})
350
+
351
+
352
+ def _reject_removed_limit(data: dict[str, Any]) -> None:
353
+ if "limit" in data:
354
+ raise InvalidRequestError(
355
+ "limit is no longer accepted; use page_size and cursor",
356
+ details={"field": "limit"},
357
+ )
358
+
359
+
360
+ def _reject_unknown_fields(data: dict[str, Any], allowed: set[str]) -> None:
361
+ unknown = sorted(set(data) - allowed)
362
+ if unknown:
363
+ raise InvalidRequestError(
364
+ f"Unknown request field: {unknown[0]}",
365
+ details={"field": unknown[0]},
366
+ )
367
+
368
+
369
+ def _parse_pagination(
370
+ data: dict[str, Any], *, allowed_sort_by: set[str], default_sort_by: str
371
+ ) -> dict[str, int | str | None]:
372
+ page_size = data.get("page_size", _MAX_PAGE_SIZE)
373
+ if isinstance(page_size, bool) or not isinstance(page_size, int) or not 1 <= page_size <= _MAX_PAGE_SIZE:
374
+ raise InvalidRequestError(
375
+ f"page_size must be an integer between 1 and {_MAX_PAGE_SIZE}",
376
+ details={"field": "page_size"},
377
+ )
378
+ cursor = data.get("cursor")
379
+ if cursor is not None:
380
+ decode_page_cursor(cursor)
381
+ sort_by = data.get("sort_by", default_sort_by)
382
+ if not isinstance(sort_by, str) or sort_by not in allowed_sort_by:
383
+ raise InvalidRequestError("sort_by has an unsupported value", details={"field": "sort_by"})
384
+ sort_order = data.get("sort_order", "desc")
385
+ if not isinstance(sort_order, str) or sort_order not in _SORT_ORDERS:
386
+ raise InvalidRequestError(
387
+ "sort_order has an unsupported value", details={"field": "sort_order"}
388
+ )
389
+ return {
390
+ "page_size": page_size,
391
+ "cursor": cursor,
392
+ "sort_by": sort_by,
393
+ "sort_order": sort_order,
394
+ }
395
+
396
+
397
+ def _pagination_to_dict(
398
+ page_size: int, cursor: str | None, sort_by: str, sort_order: str
399
+ ) -> dict[str, int | str]:
400
+ payload: dict[str, int | str] = {
401
+ "page_size": page_size,
402
+ "sort_by": sort_by,
403
+ "sort_order": sort_order,
404
+ }
405
+ if cursor is not None:
406
+ payload["cursor"] = cursor
407
+ return payload
408
+
409
+
410
+ def _require_fields(data: dict[str, Any], required: set[str], label: str) -> None:
411
+ missing = sorted(required - set(data))
412
+ if missing:
413
+ raise InvalidRequestError(
414
+ f"{label} missing required field: {missing[0]}",
415
+ details={"field": missing[0]},
416
+ )
417
+
418
+
419
+ def _validate_dataset_name(value: Any, field: str) -> str:
420
+ if not isinstance(value, str) or not _DATASET_NAME_PATTERN.fullmatch(value):
421
+ raise InvalidRequestError(
422
+ f"{field} must be a safe dataset name", details={"field": field}
423
+ )
424
+ return value
425
+
426
+
427
+ def _literal(value: Any, field: str, allowed: set[str]) -> Any:
428
+ if not isinstance(value, str) or value not in allowed:
429
+ raise InvalidRequestError(
430
+ f"{field} has an unsupported value", details={"field": field}
431
+ )
432
+ return value
433
+
434
+
435
+ def _nullable_literal(value: Any, field: str, allowed: set[str]) -> Any | None:
436
+ if value is None:
437
+ return None
438
+ return _literal(value, field, allowed)
439
+
440
+
441
+ def _nullable_str(value: Any, field: str) -> str | None:
442
+ if value is None:
443
+ return None
444
+ if not isinstance(value, str):
445
+ raise InvalidRequestError(f"{field} must be a string", details={"field": field})
446
+ return value
447
+
448
+
449
+ def _nullable_nonnegative_int(value: Any, field: str) -> int | None:
450
+ if value is None:
451
+ return None
452
+ if isinstance(value, bool) or not isinstance(value, int) or value < 0:
453
+ raise InvalidRequestError(
454
+ f"{field} must be a non-negative integer", details={"field": field}
455
+ )
456
+ return value
457
+
458
+
459
+ def _required_dataset_array(data: dict[str, Any], field: str) -> list[str]:
460
+ if field not in data:
461
+ raise InvalidRequestError(f"Missing required field: {field}", details={"field": field})
462
+ return _dataset_array(data[field], field)
463
+
464
+
465
+ def _dataset_array(value: Any, field: str) -> list[str]:
466
+ if not isinstance(value, list) or not value:
467
+ raise InvalidRequestError(f"{field} must be a non-empty array", details={"field": field})
468
+ datasets: list[str] = []
469
+ for item in value:
470
+ if not isinstance(item, str) or not _DATASET_NAME_PATTERN.fullmatch(item):
471
+ raise InvalidRequestError(
472
+ f"{field} must contain safe dataset names",
473
+ details={"field": field},
474
+ )
475
+ datasets.append(item)
476
+ return datasets
477
+
478
+
479
+ def _optional_array(data: dict[str, Any], field: str, validate_item, error_type: type[APIContractError]) -> list[Any] | None:
480
+ if field not in data:
481
+ return None
482
+ value = data[field]
483
+ if value is None:
484
+ raise error_type(f"{field} must be an array", details={"field": field})
485
+ if not isinstance(value, list):
486
+ raise error_type(f"{field} must be an array", details={"field": field})
487
+ if not value:
488
+ raise error_type(f"{field} must be a non-empty array", details={"field": field})
489
+ return [validate_item(item, field) for item in value]
490
+
491
+
492
+ def _validate_country(value: Any, field: str) -> str:
493
+ if not isinstance(value, str) or not _ISO3_PATTERN.fullmatch(value):
494
+ raise InvalidFilterError("countries must be ISO3 country codes", details={"field": field})
495
+ return value
496
+
497
+
498
+ def _validate_year(value: Any, field: str) -> int:
499
+ if isinstance(value, bool) or not isinstance(value, int):
500
+ raise InvalidFilterError("years must be integers", details={"field": field})
501
+ return value