bls-api-client 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bls_api_client-1.0.0.dist-info/METADATA +236 -0
- bls_api_client-1.0.0.dist-info/RECORD +18 -0
- bls_api_client-1.0.0.dist-info/WHEEL +4 -0
- bls_api_client-1.0.0.dist-info/licenses/LICENSE +21 -0
- blsapi/__about__.py +3 -0
- blsapi/__init__.py +44 -0
- blsapi/_core.py +58 -0
- blsapi/_endpoints.py +33 -0
- blsapi/_retry.py +46 -0
- blsapi/aclient.py +165 -0
- blsapi/batching.py +59 -0
- blsapi/client.py +169 -0
- blsapi/config.py +53 -0
- blsapi/enums.py +23 -0
- blsapi/exceptions.py +31 -0
- blsapi/frames.py +138 -0
- blsapi/models.py +141 -0
- blsapi/py.typed +0 -0
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: bls-api-client
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: A sync + async client for the BLS Public Data API
|
|
5
|
+
Project-URL: Homepage, https://github.com/covertcast/blsapi
|
|
6
|
+
Project-URL: Repository, https://github.com/covertcast/blsapi
|
|
7
|
+
Project-URL: Issues, https://github.com/covertcast/blsapi/issues
|
|
8
|
+
Author-email: covertcast <covertcast@proton.me>
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: api-client,bls,bureau-of-labor-statistics,economics,labor-statistics,polars
|
|
12
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Intended Audience :: Financial and Insurance Industry
|
|
15
|
+
Classifier: Intended Audience :: Science/Research
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Operating System :: OS Independent
|
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
23
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
24
|
+
Classifier: Typing :: Typed
|
|
25
|
+
Requires-Python: >=3.11
|
|
26
|
+
Requires-Dist: anyio>=4.0
|
|
27
|
+
Requires-Dist: httpx2>=2.4.0
|
|
28
|
+
Requires-Dist: polars>=1.42.0
|
|
29
|
+
Requires-Dist: pydantic>=2.13.4
|
|
30
|
+
Description-Content-Type: text/markdown
|
|
31
|
+
|
|
32
|
+
# blsapi
|
|
33
|
+
|
|
34
|
+
[](https://pypi.org/project/blsapi/)
|
|
35
|
+
[](https://pypi.org/project/blsapi/)
|
|
36
|
+
[](LICENSE)
|
|
37
|
+
|
|
38
|
+
A **sync + async** client inspired by `blsR` for the [U.S. Bureau of Labor Statistics
|
|
39
|
+
(BLS) Public Data API](https://www.bls.gov/developers/).
|
|
40
|
+
|
|
41
|
+
- **Sync and async** clients (`BLSClient`, `AsyncBLSClient`).
|
|
42
|
+
- **Polars output** returns easy to use and convert Polars Dataframes.
|
|
43
|
+
- **Automatic batching** over the BLS per-request limits (series count and year span).
|
|
44
|
+
|
|
45
|
+
## Installation
|
|
46
|
+
|
|
47
|
+
Requires Python 3.11+:
|
|
48
|
+
```bash
|
|
49
|
+
pip install bls-api-client
|
|
50
|
+
# or
|
|
51
|
+
uv add bls-api-client
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
```python
|
|
55
|
+
import blsapi
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Quick start
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
from blsapi import BLSClient
|
|
62
|
+
|
|
63
|
+
with BLSClient() as client:
|
|
64
|
+
df = client.get_series("LNS14000000", start_year=2023, end_year=2024)
|
|
65
|
+
|
|
66
|
+
print(df)
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
The result is a tidy/long DataFrame. Exporting elsewhere is
|
|
70
|
+
simple:
|
|
71
|
+
|
|
72
|
+
```python
|
|
73
|
+
df.write_parquet("data.parquet")
|
|
74
|
+
df.write_csv("data.csv")
|
|
75
|
+
df.to_pandas()
|
|
76
|
+
df.to_arrow()
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### Async
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
import anyio
|
|
83
|
+
from blsapi import AsyncBLSClient
|
|
84
|
+
|
|
85
|
+
async def main():
|
|
86
|
+
async with AsyncBLSClient() as client:
|
|
87
|
+
df = await client.get_series("LNS14000000", start_year=2023, end_year=2024)
|
|
88
|
+
print(df)
|
|
89
|
+
|
|
90
|
+
anyio.run(main)
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
## Configuration
|
|
94
|
+
|
|
95
|
+
Everything has a sensible default, so `blsapi` works out of the box.
|
|
96
|
+
|
|
97
|
+
### API key
|
|
98
|
+
|
|
99
|
+
A registration key is optional but raises your rate limits substantially. The key is resolved with this precedence:
|
|
100
|
+
|
|
101
|
+
1. The explicit `api_key=` argument, if given.
|
|
102
|
+
2. Otherwise the `BLS_API_KEY` environment variable.
|
|
103
|
+
3. Otherwise no key (unauthenticated tier).
|
|
104
|
+
|
|
105
|
+
### Example:
|
|
106
|
+
```python
|
|
107
|
+
# Explicit (highest precedence)
|
|
108
|
+
client = BLSClient(api_key="your_32_char_key")
|
|
109
|
+
|
|
110
|
+
# Or rely on the environment
|
|
111
|
+
# export BLS_API_KEY=your_32_char_key (macOS/Linux)
|
|
112
|
+
# $env:BLS_API_KEY = "your_32_char_key" (PowerShell)
|
|
113
|
+
client = BLSClient()
|
|
114
|
+
|
|
115
|
+
client.has_key # returns True if a key is active
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
### Other options
|
|
119
|
+
|
|
120
|
+
All are keyword-only on both clients:
|
|
121
|
+
|
|
122
|
+
| Argument | Default | Purpose |
|
|
123
|
+
| ------------- | -------------------------------- | -------------------------------------------------------------- |
|
|
124
|
+
| `api_key` | `None` -> `BLS_API_KEY` | Registration key. |
|
|
125
|
+
| `base_url` | `https://api.bls.gov/publicAPI/v2/` | API base URL. |
|
|
126
|
+
| `timeout` | `5/30/10/5s` (connect/read/write/pool) | `httpx2.Timeout`. |
|
|
127
|
+
| `max_retries` | `3` | Application-level retries for transient failures. |
|
|
128
|
+
| `auto_batch` | `True` | Transparently split over the tier limits. |
|
|
129
|
+
| `user_agent` | `blsapi/<version>` | Sent as `User-Agent`. |
|
|
130
|
+
| `client` | `None` | Inject your own `httpx2.Client`/`AsyncClient`.|
|
|
131
|
+
|
|
132
|
+
```python
|
|
133
|
+
import httpx2
|
|
134
|
+
from blsapi import BLSClient
|
|
135
|
+
|
|
136
|
+
client = BLSClient(
|
|
137
|
+
api_key="…",
|
|
138
|
+
max_retries=5,
|
|
139
|
+
timeout=httpx2.Timeout(connect=5.0, read=60.0, write=10.0, pool=5.0),
|
|
140
|
+
user_agent="my-app/1.0 (you@example.com)",
|
|
141
|
+
)
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
## Usage
|
|
145
|
+
|
|
146
|
+
### Multiple series and named aliases
|
|
147
|
+
|
|
148
|
+
Pass a list of ids, or a `{label: id}` mapping to relabel the output's `series_id` column:
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
df = client.get_series(
|
|
152
|
+
{"unemployment": "LNS14000000", "cpi": "CUUR0000SA0"},
|
|
153
|
+
start_year=2020,
|
|
154
|
+
end_year=2024,
|
|
155
|
+
)
|
|
156
|
+
df["series_id"].unique().to_list() # -> ["cpi", "unemployment"]
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
### Wide format
|
|
160
|
+
|
|
161
|
+
Reshape the long frame to one value column per series, indexed by date:
|
|
162
|
+
|
|
163
|
+
```python
|
|
164
|
+
from blsapi import pivot_wide
|
|
165
|
+
|
|
166
|
+
wide = pivot_wide(df) # columns: date, unemployment, cpi
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
(Annual-average rows have no real date and are dropped by `pivot_wide`.)
|
|
170
|
+
|
|
171
|
+
### Calculations and catalog
|
|
172
|
+
|
|
173
|
+
```python
|
|
174
|
+
df = client.get_series(
|
|
175
|
+
"LNS14000000", start_year=2023, end_year=2024, calculations=True
|
|
176
|
+
)
|
|
177
|
+
# adds net_change_{1,3,6,12}m and pct_change_{1,3,6,12}m columns
|
|
178
|
+
|
|
179
|
+
resp = client.get_series_raw("LNS14000000", catalog=True) # -> BLSResponse with catalog metadata
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
### Surveys and popular series
|
|
183
|
+
|
|
184
|
+
```python
|
|
185
|
+
client.list_surveys() # -> DataFrame of all surveys
|
|
186
|
+
client.get_survey("CU") # -> dict of metadata for one survey
|
|
187
|
+
client.get_popular() # -> list of popular series ids
|
|
188
|
+
client.get_popular("LN") # -> popular ids within a survey
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
### Quota planning
|
|
192
|
+
|
|
193
|
+
```python
|
|
194
|
+
client.query_cost(["LNS14000000", "CUUR0000SA0"], 2000, 2024) # -> number of API calls used
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
## Error handling
|
|
198
|
+
|
|
199
|
+
All errors derive from `BLSError`, so you can catch broadly or narrowly:
|
|
200
|
+
|
|
201
|
+
```python
|
|
202
|
+
from blsapi import BLSError, BLSValidationError, BLSAPIError, BLSHTTPError
|
|
203
|
+
|
|
204
|
+
try:
|
|
205
|
+
df = client.get_series("BAD_ID", start_year=2024, end_year=2023)
|
|
206
|
+
except BLSValidationError:
|
|
207
|
+
... # bad input caught locally, before any network call
|
|
208
|
+
except BLSAPIError as e:
|
|
209
|
+
... # BLS returned a non-success status; inspect e.status and e.messages
|
|
210
|
+
except BLSHTTPError:
|
|
211
|
+
... # transport/HTTP failure that survived the retry loop
|
|
212
|
+
except BLSError:
|
|
213
|
+
... # anything else from this library
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
## Tiers & limits
|
|
217
|
+
|
|
218
|
+
| | Series per request | Years per request | Queries per day |
|
|
219
|
+
| --- | --- | --- | --- |
|
|
220
|
+
| **No key** | 25 | 10 | 25 |
|
|
221
|
+
| **With key** | 50 | 20 | 500 |
|
|
222
|
+
|
|
223
|
+
With `auto_batch=True` (the default), requests that exceed the per-request limits are split
|
|
224
|
+
into multiple calls automatically and stitched back together. Register for a free key at
|
|
225
|
+
<https://data.bls.gov/registrationEngine/>.
|
|
226
|
+
|
|
227
|
+
## Development
|
|
228
|
+
|
|
229
|
+
```bash
|
|
230
|
+
uv sync # install dependencies
|
|
231
|
+
uv build # build the sdist + wheel
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
## License
|
|
235
|
+
|
|
236
|
+
[MIT](LICENSE)
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
blsapi/__about__.py,sha256=l3nlRfP79jMcuBVWEOoKZ-BJRYGwRf6DSq2yGyTyekQ,73
|
|
2
|
+
blsapi/__init__.py,sha256=zoWbHEj9fwLjIZUBKdn0UYpfcEsP-6gVCBWYSmAzYgA,1195
|
|
3
|
+
blsapi/_core.py,sha256=2aiYuNNo1sX2HIAtL5iQ77_HRIkGn6cWce5ke_Mys7E,2203
|
|
4
|
+
blsapi/_endpoints.py,sha256=Fi32WkLP_jCb4vNiV5JQKaver-Q5-0bCD5pvQMC1C1Q,803
|
|
5
|
+
blsapi/_retry.py,sha256=9siSaL7mBowEUojUxKTSqrJPrlMRI5qIVGh6c7i894U,1289
|
|
6
|
+
blsapi/aclient.py,sha256=ZCXMKwIv0ST7uUmm_Jvz9ZO-KCnF-zzzl1vIlwNaz9Q,6377
|
|
7
|
+
blsapi/batching.py,sha256=_NLObbwvssEq4u4crjY7f3AxsdMsaGiAGNFgbrIEJBE,1904
|
|
8
|
+
blsapi/client.py,sha256=8FYslVbGjygFIYdTtA676z881-DLRDCCWaFG6yJDfho,6609
|
|
9
|
+
blsapi/config.py,sha256=BORa_W78VlaW83P6-M2-uVs_kCLdYY4_2cgLW9FZuJo,1741
|
|
10
|
+
blsapi/enums.py,sha256=2y6zeHl_ldUAF7r2sNyLBo0NutjLRaQ0fEM74zBfqs4,671
|
|
11
|
+
blsapi/exceptions.py,sha256=kCeZ1I9N99_QVXu2mmvWOpP_Q3Fw0Fm05wyC2PKNbXM,775
|
|
12
|
+
blsapi/frames.py,sha256=U4q5dXEh5ej7vVBMRgc7HWPNoSV-zm2jIlbc7gviH8o,5074
|
|
13
|
+
blsapi/models.py,sha256=vHQy9RcOYBGWG3YGtlk554n6-aR5q_joxegzlRpmG2Y,4774
|
|
14
|
+
blsapi/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
|
+
bls_api_client-1.0.0.dist-info/METADATA,sha256=vWC6jXGBn0xrx7Dv7V9IRZqeeZ3Hansd4WiOynMJs7I,7135
|
|
16
|
+
bls_api_client-1.0.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
17
|
+
bls_api_client-1.0.0.dist-info/licenses/LICENSE,sha256=7Ml9vqERSFrXdwfgQj99rsL8Nwz9jskhV3xiiCrBSB8,1067
|
|
18
|
+
bls_api_client-1.0.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 covertcast
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
blsapi/__about__.py
ADDED
blsapi/__init__.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""blsapi - a sync + async client for the BLS Public Data API.
|
|
2
|
+
|
|
3
|
+
Quick start:
|
|
4
|
+
|
|
5
|
+
from blsapi import BLSClient
|
|
6
|
+
with BLSClient() as client: # reads BLS_API_KEY from env if set
|
|
7
|
+
df = client.get_series("LNS14000000", start_year=2023, end_year=2024)
|
|
8
|
+
|
|
9
|
+
# async
|
|
10
|
+
from blsapi import AsyncBLSClient
|
|
11
|
+
async with AsyncBLSClient() as client:
|
|
12
|
+
df = await client.get_series("LNS14000000", start_year=2023, end_year=2024)
|
|
13
|
+
|
|
14
|
+
The result is a Polars DataFrame. Exporting it to other formats is simple:
|
|
15
|
+
df.write_parquet(...), df.write_csv(...), df.to_pandas(), df.to_arrow()
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from .__about__ import __version__
|
|
19
|
+
from .aclient import AsyncBLSClient
|
|
20
|
+
from .client import BLSClient
|
|
21
|
+
from .enums import PeriodType
|
|
22
|
+
from .exceptions import (
|
|
23
|
+
BLSAPIError,
|
|
24
|
+
BLSError,
|
|
25
|
+
BLSHTTPError,
|
|
26
|
+
BLSValidationError,
|
|
27
|
+
)
|
|
28
|
+
from .frames import period_to_date, pivot_wide, series_to_frame
|
|
29
|
+
from .models import BLSResponse
|
|
30
|
+
|
|
31
|
+
__all__ = [
|
|
32
|
+
"AsyncBLSClient",
|
|
33
|
+
"BLSClient",
|
|
34
|
+
"BLSError",
|
|
35
|
+
"BLSValidationError",
|
|
36
|
+
"BLSAPIError",
|
|
37
|
+
"BLSHTTPError",
|
|
38
|
+
"BLSResponse",
|
|
39
|
+
"PeriodType",
|
|
40
|
+
"series_to_frame",
|
|
41
|
+
"pivot_wide",
|
|
42
|
+
"period_to_date",
|
|
43
|
+
"__version__",
|
|
44
|
+
]
|
blsapi/_core.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""Utilities for both the sync and async clients."""
|
|
2
|
+
|
|
3
|
+
from .models import BLSResponse, SeriesRequest, SeriesResult
|
|
4
|
+
from ._endpoints import RequestSpec, data_url
|
|
5
|
+
from collections.abc import Iterable
|
|
6
|
+
from . import config
|
|
7
|
+
from .batching import chunk_series, year_windows
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def build_data_request(req: SeriesRequest, api_key: str | None, base_url: str) -> RequestSpec:
|
|
11
|
+
"""Turn a validated SeriesRequest into a RequestSpec."""
|
|
12
|
+
return RequestSpec(
|
|
13
|
+
method="POST",
|
|
14
|
+
url=data_url(base_url),
|
|
15
|
+
json=req.to_payload(api_key),
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def parse_data_response(payload: dict) -> BLSResponse:
|
|
20
|
+
"""Validate the payload. Raises typed errors on failure."""
|
|
21
|
+
return BLSResponse.model_validate(payload)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def plan_data_requests(
|
|
25
|
+
req: SeriesRequest, api_key: str | None, base_url: str, auto_batch: bool
|
|
26
|
+
) -> list[RequestSpec]:
|
|
27
|
+
"""One RequestSpec per API call, splitting over the tier limits when auto_batch is enabled."""
|
|
28
|
+
if not auto_batch:
|
|
29
|
+
return [build_data_request(req, api_key, base_url)]
|
|
30
|
+
|
|
31
|
+
series_limit, year_limit, _ = config.limits_for(api_key)
|
|
32
|
+
if req.start_year is not None and req.end_year is not None:
|
|
33
|
+
windows = list(year_windows(req.start_year, req.end_year, year_limit))
|
|
34
|
+
else:
|
|
35
|
+
windows = [(req.start_year, req.end_year)] # no year filter produces a single window
|
|
36
|
+
|
|
37
|
+
# every combination of series-chunk and year-window is one request.
|
|
38
|
+
return [
|
|
39
|
+
build_data_request(
|
|
40
|
+
req.model_copy(update={"series_ids": ids, "start_year": lo, "end_year": hi}),
|
|
41
|
+
api_key,
|
|
42
|
+
base_url,
|
|
43
|
+
)
|
|
44
|
+
for ids in chunk_series(req.series_ids, series_limit)
|
|
45
|
+
for lo, hi in windows
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def merge_series(series_lists: Iterable[list[SeriesResult]]) -> list[SeriesResult]:
|
|
50
|
+
"""Combine SeriesResults from several responses, concatenating .data by series_id."""
|
|
51
|
+
merged: dict[str, SeriesResult] = {}
|
|
52
|
+
for series in series_lists:
|
|
53
|
+
for s in series:
|
|
54
|
+
if s.series_id in merged:
|
|
55
|
+
merged[s.series_id].data.extend(s.data) # same series, another year-window
|
|
56
|
+
else:
|
|
57
|
+
merged[s.series_id] = s # first sighting
|
|
58
|
+
return list(merged.values())
|
blsapi/_endpoints.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
@dataclass(slots=True)
|
|
6
|
+
class RequestSpec:
|
|
7
|
+
"""Everything _send() needs to make a request"""
|
|
8
|
+
|
|
9
|
+
method: str
|
|
10
|
+
url: str
|
|
11
|
+
json: dict[str, Any] | None = None
|
|
12
|
+
params: dict[str, Any] | None = None
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# All builders take the client's base_url so the same code works against v1 or v2.
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def data_url(base_url: str) -> str:
|
|
19
|
+
return f"{base_url}timeseries/data/"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def series_url(base_url: str, series_id: str) -> str:
|
|
23
|
+
return f"{base_url}timeseries/data/{series_id}"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def popular_url(base_url: str) -> str:
|
|
27
|
+
return f"{base_url}timeseries/popular"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def surveys_url(base_url: str, survey_abbr: str | None = None) -> str:
|
|
31
|
+
if survey_abbr is None:
|
|
32
|
+
return f"{base_url}surveys"
|
|
33
|
+
return f"{base_url}surveys/{survey_abbr}"
|
blsapi/_retry.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import random
|
|
2
|
+
|
|
3
|
+
import httpx2
|
|
4
|
+
|
|
5
|
+
from .config import BACKOFF_BASE, BACKOFF_MAX, RETRYABLE_STATUS
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
_RETRYABLE_TRANSPORT_ERRORS: tuple[type[Exception], ...] = (
|
|
9
|
+
httpx2.ConnectError,
|
|
10
|
+
httpx2.ConnectTimeout,
|
|
11
|
+
httpx2.ReadTimeout,
|
|
12
|
+
httpx2.WriteTimeout,
|
|
13
|
+
httpx2.PoolTimeout,
|
|
14
|
+
httpx2.RemoteProtocolError,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def is_retryable(exc: Exception) -> bool:
|
|
19
|
+
"""True if the client should wait and try again for this error."""
|
|
20
|
+
if isinstance(exc, _RETRYABLE_TRANSPORT_ERRORS):
|
|
21
|
+
return True
|
|
22
|
+
if isinstance(exc, httpx2.HTTPStatusError):
|
|
23
|
+
return exc.response.status_code in RETRYABLE_STATUS
|
|
24
|
+
return False
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def retry_after(exc: Exception) -> float | None:
|
|
28
|
+
"""Honor a server-sent `Retry-After` header if present.
|
|
29
|
+
Returns None when there's no usable header, so the caller falls back to backoff.
|
|
30
|
+
"""
|
|
31
|
+
response = getattr(exc, "response", None)
|
|
32
|
+
if response is None:
|
|
33
|
+
return None
|
|
34
|
+
value = response.headers.get("Retry-After")
|
|
35
|
+
if not value:
|
|
36
|
+
return None
|
|
37
|
+
try:
|
|
38
|
+
return float(value)
|
|
39
|
+
except ValueError:
|
|
40
|
+
return None
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def backoff_delay(attempt: int) -> float:
|
|
44
|
+
"""Exponential backoff, capped at BACKOFF_MAX.."""
|
|
45
|
+
ceiling = min(BACKOFF_MAX, BACKOFF_BASE * (2**attempt))
|
|
46
|
+
return random.uniform(0.0, ceiling)
|
blsapi/aclient.py
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
"""Asynchronous BLS client."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import Mapping, Sequence
|
|
4
|
+
|
|
5
|
+
import anyio
|
|
6
|
+
import httpx2
|
|
7
|
+
import polars as pl
|
|
8
|
+
|
|
9
|
+
from . import batching, config
|
|
10
|
+
from ._core import (
|
|
11
|
+
build_data_request,
|
|
12
|
+
merge_series,
|
|
13
|
+
parse_data_response,
|
|
14
|
+
plan_data_requests,
|
|
15
|
+
)
|
|
16
|
+
from ._endpoints import RequestSpec, popular_url, surveys_url
|
|
17
|
+
from ._retry import backoff_delay, is_retryable, retry_after
|
|
18
|
+
from .exceptions import BLSHTTPError
|
|
19
|
+
from .frames import series_to_frame
|
|
20
|
+
from .models import BLSResponse, SeriesRequest, normalize_series_ids
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class AsyncBLSClient:
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
api_key: str | None = None,
|
|
27
|
+
*,
|
|
28
|
+
base_url: str = config.DEFAULT_BASE_URL,
|
|
29
|
+
timeout: float | httpx2.Timeout = config.DEFAULT_TIMEOUT,
|
|
30
|
+
max_retries: int = config.DEFAULT_MAX_RETRIES,
|
|
31
|
+
auto_batch: bool = True,
|
|
32
|
+
user_agent: str = config.DEFAULT_USER_AGENT,
|
|
33
|
+
client: httpx2.AsyncClient | None = None,
|
|
34
|
+
) -> None:
|
|
35
|
+
self._api_key = config.resolve_api_key(api_key)
|
|
36
|
+
self._base_url = base_url
|
|
37
|
+
self._max_retries = max_retries
|
|
38
|
+
self._auto_batch = auto_batch
|
|
39
|
+
|
|
40
|
+
if client is not None:
|
|
41
|
+
self._http = client
|
|
42
|
+
self._owns_client = False
|
|
43
|
+
else:
|
|
44
|
+
transport = httpx2.AsyncHTTPTransport(retries=2)
|
|
45
|
+
self._http = httpx2.AsyncClient(
|
|
46
|
+
timeout=timeout, transport=transport, headers={"User-Agent": user_agent}
|
|
47
|
+
)
|
|
48
|
+
self._owns_client = True
|
|
49
|
+
|
|
50
|
+
@property
|
|
51
|
+
def has_key(self) -> bool:
|
|
52
|
+
return self._api_key is not None
|
|
53
|
+
|
|
54
|
+
async def _send(self, spec: RequestSpec) -> dict:
|
|
55
|
+
"""Async implementation of BLSClient._send."""
|
|
56
|
+
last_exc: Exception | None = None
|
|
57
|
+
for attempt in range(self._max_retries + 1):
|
|
58
|
+
try:
|
|
59
|
+
response = await self._http.request(
|
|
60
|
+
spec.method, spec.url, json=spec.json, params=spec.params
|
|
61
|
+
)
|
|
62
|
+
response.raise_for_status()
|
|
63
|
+
return response.json()
|
|
64
|
+
except (httpx2.RequestError, httpx2.HTTPStatusError) as exc:
|
|
65
|
+
last_exc = exc
|
|
66
|
+
if attempt == self._max_retries or not is_retryable(exc):
|
|
67
|
+
raise BLSHTTPError(str(exc)) from exc
|
|
68
|
+
await anyio.sleep(retry_after(exc) or backoff_delay(attempt))
|
|
69
|
+
except ValueError as exc:
|
|
70
|
+
raise BLSHTTPError(f"BLS returned a non-JSON response: {exc}") from exc
|
|
71
|
+
raise BLSHTTPError(str(last_exc)) from last_exc
|
|
72
|
+
|
|
73
|
+
async def get_series(
|
|
74
|
+
self,
|
|
75
|
+
series_ids: str | Sequence[str] | Mapping[str, str],
|
|
76
|
+
*,
|
|
77
|
+
start_year: int | None = None,
|
|
78
|
+
end_year: int | None = None,
|
|
79
|
+
catalog: bool = False,
|
|
80
|
+
calculations: bool = False,
|
|
81
|
+
annual_average: bool = False,
|
|
82
|
+
aspects: bool = False,
|
|
83
|
+
latest: bool = False,
|
|
84
|
+
parse_values: bool = True,
|
|
85
|
+
) -> pl.DataFrame:
|
|
86
|
+
"""Fetch one or more series as a tidy Polars DataFrame."""
|
|
87
|
+
bls_ids, aliases = normalize_series_ids(series_ids)
|
|
88
|
+
req = SeriesRequest(
|
|
89
|
+
series_ids=bls_ids,
|
|
90
|
+
start_year=start_year,
|
|
91
|
+
end_year=end_year,
|
|
92
|
+
catalog=catalog,
|
|
93
|
+
calculations=calculations,
|
|
94
|
+
annual_average=annual_average,
|
|
95
|
+
aspects=aspects,
|
|
96
|
+
latest=latest,
|
|
97
|
+
)
|
|
98
|
+
specs = plan_data_requests(req, self._api_key, self._base_url, self._auto_batch)
|
|
99
|
+
series = merge_series(
|
|
100
|
+
[parse_data_response(await self._send(spec)).series for spec in specs]
|
|
101
|
+
)
|
|
102
|
+
return series_to_frame(series, parse_values=parse_values, aliases=aliases)
|
|
103
|
+
|
|
104
|
+
async def get_series_raw(
|
|
105
|
+
self,
|
|
106
|
+
series_ids: str | Sequence[str] | Mapping[str, str],
|
|
107
|
+
**flags: object,
|
|
108
|
+
) -> BLSResponse:
|
|
109
|
+
"""Same inputs as get_series, but returns the validated BLSResponse (no frame)."""
|
|
110
|
+
bls_ids, _ = normalize_series_ids(series_ids)
|
|
111
|
+
req = SeriesRequest(series_ids=bls_ids, **flags)
|
|
112
|
+
spec = build_data_request(req, self._api_key, self._base_url)
|
|
113
|
+
return parse_data_response(await self._send(spec))
|
|
114
|
+
|
|
115
|
+
async def get_latest(self, series_id: str, **flags: object) -> pl.DataFrame:
|
|
116
|
+
"""Most-recent data point for a single series."""
|
|
117
|
+
return await self.get_series(series_id, latest=True, **flags)
|
|
118
|
+
|
|
119
|
+
async def list_surveys(self) -> pl.DataFrame:
|
|
120
|
+
"""All BLS surveys as a small DataFrame."""
|
|
121
|
+
params = {"registrationkey": self._api_key} if self._api_key else None
|
|
122
|
+
spec = RequestSpec("GET", surveys_url(self._base_url), params=params)
|
|
123
|
+
payload = await self._send(spec)
|
|
124
|
+
return pl.DataFrame(payload["Results"]["survey"])
|
|
125
|
+
|
|
126
|
+
async def get_survey(self, survey_abbr: str) -> dict:
|
|
127
|
+
"""Metadata for a single survey."""
|
|
128
|
+
params = {"registrationkey": self._api_key} if self._api_key else None
|
|
129
|
+
spec = RequestSpec("GET", surveys_url(self._base_url, survey_abbr), params=params)
|
|
130
|
+
payload = await self._send(spec)
|
|
131
|
+
return payload["Results"]
|
|
132
|
+
|
|
133
|
+
async def get_popular(self, survey: str | None = None) -> list[str]:
|
|
134
|
+
"""The most popular series ids, optionally scoped to one survey."""
|
|
135
|
+
params: dict[str, str] = {}
|
|
136
|
+
if survey is not None:
|
|
137
|
+
params["survey"] = survey
|
|
138
|
+
if self._api_key:
|
|
139
|
+
params["registrationkey"] = self._api_key
|
|
140
|
+
spec = RequestSpec("GET", popular_url(self._base_url), params=params or None)
|
|
141
|
+
payload = await self._send(spec)
|
|
142
|
+
return [s["seriesID"] for s in payload["Results"]["series"]]
|
|
143
|
+
|
|
144
|
+
def query_cost(
|
|
145
|
+
self,
|
|
146
|
+
series_ids: str | Sequence[str] | Mapping[str, str],
|
|
147
|
+
start_year: int | None = None,
|
|
148
|
+
end_year: int | None = None,
|
|
149
|
+
) -> int:
|
|
150
|
+
"""Calculates how many API calls get_series() would consume."""
|
|
151
|
+
bls_ids, _ = normalize_series_ids(series_ids)
|
|
152
|
+
series_limit, year_limit, _ = config.limits_for(self._api_key)
|
|
153
|
+
return batching.estimate_query_count(
|
|
154
|
+
len(bls_ids), start_year, end_year, series_limit, year_limit
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
async def aclose(self) -> None:
|
|
158
|
+
if self._owns_client:
|
|
159
|
+
await self._http.aclose()
|
|
160
|
+
|
|
161
|
+
async def __aenter__(self) -> "AsyncBLSClient":
|
|
162
|
+
return self
|
|
163
|
+
|
|
164
|
+
async def __aexit__(self, *exc_info: object) -> None:
|
|
165
|
+
await self.aclose()
|
blsapi/batching.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""Batching helpers to help work around BLS per-request limits.
|
|
2
|
+
|
|
3
|
+
BLS caps each request at N series and an M-year span (25/10 without a key, 50/20 with).
|
|
4
|
+
To fetch more, the client splits into chunks, issues several requests, and stitches the
|
|
5
|
+
results back together.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import math
|
|
9
|
+
from collections.abc import Iterator, Sequence
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def chunk_series(ids: Sequence[str], size: int) -> Iterator[list[str]]:
|
|
13
|
+
"""Yield successive `size`-length chunks of series ids.
|
|
14
|
+
|
|
15
|
+
list(chunk_series(["a", "b", "c"], 2))
|
|
16
|
+
[['a', 'b'], ['c']]
|
|
17
|
+
"""
|
|
18
|
+
if size < 1:
|
|
19
|
+
raise ValueError("size must be >= 1")
|
|
20
|
+
for start in range(0, len(ids), size):
|
|
21
|
+
yield list(ids[start : start + size])
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def year_windows(start: int, end: int, span: int) -> Iterator[tuple[int, int]]:
|
|
25
|
+
"""Yield inclusive (lo, hi) year windows no longer than `span` years.
|
|
26
|
+
|
|
27
|
+
list(year_windows(2000, 2024, 10))
|
|
28
|
+
[(2000, 2009), (2010, 2019), (2020, 2024)]
|
|
29
|
+
"""
|
|
30
|
+
if span < 1:
|
|
31
|
+
raise ValueError("span must be >= 1")
|
|
32
|
+
if start > end:
|
|
33
|
+
raise ValueError("start must be <= end")
|
|
34
|
+
lo = start
|
|
35
|
+
while lo <= end:
|
|
36
|
+
hi = min(lo + span - 1, end)
|
|
37
|
+
yield (lo, hi)
|
|
38
|
+
lo = hi + 1
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def estimate_query_count(
|
|
42
|
+
n_series: int,
|
|
43
|
+
start_year: int | None,
|
|
44
|
+
end_year: int | None,
|
|
45
|
+
series_limit: int,
|
|
46
|
+
year_limit: int,
|
|
47
|
+
) -> int:
|
|
48
|
+
"""How many API calls a get_series() will consume after batching.
|
|
49
|
+
|
|
50
|
+
Useful for staying under the daily quota; exposed via BLSClient.query_cost().
|
|
51
|
+
A year range only multiplies the count when BOTH years are given (otherwise BLS
|
|
52
|
+
returns its default window in a single call).
|
|
53
|
+
"""
|
|
54
|
+
series_chunks = max(1, math.ceil(n_series / series_limit))
|
|
55
|
+
if start_year is None or end_year is None:
|
|
56
|
+
year_chunks = 1
|
|
57
|
+
else:
|
|
58
|
+
year_chunks = max(1, math.ceil((end_year - start_year + 1) / year_limit))
|
|
59
|
+
return series_chunks * year_chunks
|
blsapi/client.py
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
"""Synchronous BLS client."""
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
from collections.abc import Mapping, Sequence
|
|
5
|
+
|
|
6
|
+
import httpx2
|
|
7
|
+
import polars as pl
|
|
8
|
+
|
|
9
|
+
from . import batching, config
|
|
10
|
+
from ._core import (
|
|
11
|
+
build_data_request,
|
|
12
|
+
merge_series,
|
|
13
|
+
parse_data_response,
|
|
14
|
+
plan_data_requests,
|
|
15
|
+
)
|
|
16
|
+
from ._endpoints import RequestSpec, popular_url, surveys_url
|
|
17
|
+
from ._retry import backoff_delay, is_retryable, retry_after
|
|
18
|
+
from .exceptions import BLSHTTPError
|
|
19
|
+
from .frames import series_to_frame
|
|
20
|
+
from .models import BLSResponse, SeriesRequest, normalize_series_ids
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class BLSClient:
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
api_key: str | None = None,
|
|
27
|
+
*,
|
|
28
|
+
base_url: str = config.DEFAULT_BASE_URL,
|
|
29
|
+
timeout: float | httpx2.Timeout = config.DEFAULT_TIMEOUT,
|
|
30
|
+
max_retries: int = config.DEFAULT_MAX_RETRIES,
|
|
31
|
+
auto_batch: bool = True,
|
|
32
|
+
user_agent: str = config.DEFAULT_USER_AGENT,
|
|
33
|
+
client: httpx2.Client | None = None,
|
|
34
|
+
) -> None:
|
|
35
|
+
self._api_key = config.resolve_api_key(api_key)
|
|
36
|
+
self._base_url = base_url
|
|
37
|
+
self._max_retries = max_retries
|
|
38
|
+
self._auto_batch = auto_batch
|
|
39
|
+
|
|
40
|
+
if client is not None:
|
|
41
|
+
# Caller-supplied client.
|
|
42
|
+
self._http = client
|
|
43
|
+
self._owns_client = False
|
|
44
|
+
else:
|
|
45
|
+
transport = httpx2.HTTPTransport(retries=2)
|
|
46
|
+
self._http = httpx2.Client(
|
|
47
|
+
timeout=timeout, transport=transport, headers={"User-Agent": user_agent}
|
|
48
|
+
)
|
|
49
|
+
self._owns_client = True
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def has_key(self) -> bool:
|
|
53
|
+
return self._api_key is not None
|
|
54
|
+
|
|
55
|
+
def _send(self, spec: RequestSpec) -> dict:
|
|
56
|
+
"""Send a request with retry/backoff, returning parsed JSON."""
|
|
57
|
+
last_exc: Exception | None = None
|
|
58
|
+
for attempt in range(self._max_retries + 1):
|
|
59
|
+
try:
|
|
60
|
+
response = self._http.request(
|
|
61
|
+
spec.method, spec.url, json=spec.json, params=spec.params
|
|
62
|
+
)
|
|
63
|
+
response.raise_for_status()
|
|
64
|
+
return response.json()
|
|
65
|
+
except (httpx2.RequestError, httpx2.HTTPStatusError) as exc:
|
|
66
|
+
last_exc = exc
|
|
67
|
+
if attempt == self._max_retries or not is_retryable(exc):
|
|
68
|
+
raise BLSHTTPError(str(exc)) from exc
|
|
69
|
+
time.sleep(retry_after(exc) or backoff_delay(attempt))
|
|
70
|
+
except ValueError as exc:
|
|
71
|
+
raise BLSHTTPError(f"BLS returned a non-JSON response: {exc}") from exc
|
|
72
|
+
raise BLSHTTPError(str(last_exc)) from last_exc
|
|
73
|
+
|
|
74
|
+
def get_series(
|
|
75
|
+
self,
|
|
76
|
+
series_ids: str | Sequence[str] | Mapping[str, str],
|
|
77
|
+
*,
|
|
78
|
+
start_year: int | None = None,
|
|
79
|
+
end_year: int | None = None,
|
|
80
|
+
catalog: bool = False,
|
|
81
|
+
calculations: bool = False,
|
|
82
|
+
annual_average: bool = False,
|
|
83
|
+
aspects: bool = False,
|
|
84
|
+
latest: bool = False,
|
|
85
|
+
parse_values: bool = True,
|
|
86
|
+
) -> pl.DataFrame:
|
|
87
|
+
"""Fetch one or more series as a tidy Polars DataFrame.
|
|
88
|
+
|
|
89
|
+
`series_ids` may be a single id, a sequence of ids, or a {label: id} mapping (the
|
|
90
|
+
labels then replace the ids in the output's series_id column). When auto_batch is True,
|
|
91
|
+
requests that exceed the tier limits are transparently split across multiple API
|
|
92
|
+
calls and stitched back together by series id.
|
|
93
|
+
"""
|
|
94
|
+
bls_ids, aliases = normalize_series_ids(series_ids)
|
|
95
|
+
req = SeriesRequest(
|
|
96
|
+
series_ids=bls_ids,
|
|
97
|
+
start_year=start_year,
|
|
98
|
+
end_year=end_year,
|
|
99
|
+
catalog=catalog,
|
|
100
|
+
calculations=calculations,
|
|
101
|
+
annual_average=annual_average,
|
|
102
|
+
aspects=aspects,
|
|
103
|
+
latest=latest,
|
|
104
|
+
)
|
|
105
|
+
specs = plan_data_requests(req, self._api_key, self._base_url, self._auto_batch)
|
|
106
|
+
series = merge_series(parse_data_response(self._send(spec)).series for spec in specs)
|
|
107
|
+
return series_to_frame(series, parse_values=parse_values, aliases=aliases)
|
|
108
|
+
|
|
109
|
+
def get_series_raw(
|
|
110
|
+
self,
|
|
111
|
+
series_ids: str | Sequence[str] | Mapping[str, str],
|
|
112
|
+
**flags: object,
|
|
113
|
+
) -> BLSResponse:
|
|
114
|
+
"""Same inputs as get_series, but return the validated BLSResponse (no frame)."""
|
|
115
|
+
bls_ids, _ = normalize_series_ids(series_ids)
|
|
116
|
+
req = SeriesRequest(series_ids=bls_ids, **flags)
|
|
117
|
+
spec = build_data_request(req, self._api_key, self._base_url)
|
|
118
|
+
return parse_data_response(self._send(spec))
|
|
119
|
+
|
|
120
|
+
def get_latest(self, series_id: str, **flags: object) -> pl.DataFrame:
|
|
121
|
+
"""Most-recent data point for a single series (get_series with latest=True)."""
|
|
122
|
+
return self.get_series(series_id, latest=True, **flags)
|
|
123
|
+
|
|
124
|
+
def list_surveys(self) -> pl.DataFrame:
|
|
125
|
+
"""All BLS surveys as a small DataFrame."""
|
|
126
|
+
params = {"registrationkey": self._api_key} if self._api_key else None
|
|
127
|
+
spec = RequestSpec("GET", surveys_url(self._base_url), params=params)
|
|
128
|
+
payload = self._send(spec)
|
|
129
|
+
return pl.DataFrame(payload["Results"]["survey"])
|
|
130
|
+
|
|
131
|
+
def get_survey(self, survey_abbr: str) -> dict:
|
|
132
|
+
"""Metadata for a single survey."""
|
|
133
|
+
params = {"registrationkey": self._api_key} if self._api_key else None
|
|
134
|
+
spec = RequestSpec("GET", surveys_url(self._base_url, survey_abbr), params=params)
|
|
135
|
+
return self._send(spec)["Results"]
|
|
136
|
+
|
|
137
|
+
def get_popular(self, survey: str | None = None) -> list[str]:
|
|
138
|
+
"""The most popular series ids, optionally scoped to one survey."""
|
|
139
|
+
params: dict[str, str] = {}
|
|
140
|
+
if survey is not None:
|
|
141
|
+
params["survey"] = survey
|
|
142
|
+
if self._api_key:
|
|
143
|
+
params["registrationkey"] = self._api_key
|
|
144
|
+
spec = RequestSpec("GET", popular_url(self._base_url), params=params or None)
|
|
145
|
+
payload = self._send(spec)
|
|
146
|
+
return [s["seriesID"] for s in payload["Results"]["series"]]
|
|
147
|
+
|
|
148
|
+
def query_cost(
|
|
149
|
+
self,
|
|
150
|
+
series_ids: str | Sequence[str] | Mapping[str, str],
|
|
151
|
+
start_year: int | None = None,
|
|
152
|
+
end_year: int | None = None,
|
|
153
|
+
) -> int:
|
|
154
|
+
"""Calculates how many API calls a matching get_series() would consume."""
|
|
155
|
+
bls_ids, _ = normalize_series_ids(series_ids)
|
|
156
|
+
series_limit, year_limit, _ = config.limits_for(self._api_key)
|
|
157
|
+
return batching.estimate_query_count(
|
|
158
|
+
len(bls_ids), start_year, end_year, series_limit, year_limit
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
def close(self) -> None:
|
|
162
|
+
if self._owns_client:
|
|
163
|
+
self._http.close()
|
|
164
|
+
|
|
165
|
+
def __enter__(self) -> "BLSClient":
|
|
166
|
+
return self
|
|
167
|
+
|
|
168
|
+
def __exit__(self, *exc_info: object) -> None:
|
|
169
|
+
self.close()
|
blsapi/config.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"""Default configuration constants and helpers for the BLS client.
|
|
2
|
+
Everything here is a sensible default that the client constructor can override.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import warnings
|
|
7
|
+
|
|
8
|
+
import httpx2
|
|
9
|
+
|
|
10
|
+
from .__about__ import __version__
|
|
11
|
+
|
|
12
|
+
# endpoints & auth
|
|
13
|
+
DEFAULT_BASE_URL = "https://api.bls.gov/publicAPI/v2/"
|
|
14
|
+
API_KEY_ENV_VAR = "BLS_API_KEY"
|
|
15
|
+
|
|
16
|
+
# BLS asks clients to identify themselves.
|
|
17
|
+
DEFAULT_USER_AGENT = f"blsapi/{__version__}"
|
|
18
|
+
# Registration keys are 32 hex chars.
|
|
19
|
+
EXPECTED_KEY_LENGTH = 32
|
|
20
|
+
|
|
21
|
+
# timeouts
|
|
22
|
+
DEFAULT_TIMEOUT = httpx2.Timeout(connect=5.0, read=30.0, write=10.0, pool=5.0)
|
|
23
|
+
|
|
24
|
+
# application-level retry/backoff
|
|
25
|
+
DEFAULT_MAX_RETRIES = 3
|
|
26
|
+
BACKOFF_BASE = 0.5 # seconds; first backoff ceiling
|
|
27
|
+
BACKOFF_MAX = 30.0 # seconds; cap so jittered backoff can't explode
|
|
28
|
+
RETRYABLE_STATUS = frozenset({429, 500, 502, 503, 504})
|
|
29
|
+
|
|
30
|
+
# BLS tier limits: (series_per_request, years_per_request, daily_queries)
|
|
31
|
+
# These are used for input validation and auto-batching. The limits differ when a registration key is present.
|
|
32
|
+
LIMITS_WITH_KEY = (50, 20, 500)
|
|
33
|
+
LIMITS_NO_KEY = (25, 10, 25)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def limits_for(api_key: str | None) -> tuple[int, int, int]:
|
|
37
|
+
"""Return (series_per_request, years_per_request, daily_queries) for the active tier."""
|
|
38
|
+
return LIMITS_WITH_KEY if api_key else LIMITS_NO_KEY
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def resolve_api_key(api_key: str | None) -> str | None:
|
|
42
|
+
"""Resolve the effective API key.
|
|
43
|
+
|
|
44
|
+
Warns when the key isn't the expected length.
|
|
45
|
+
"""
|
|
46
|
+
if api_key is None:
|
|
47
|
+
api_key = os.environ.get(API_KEY_ENV_VAR)
|
|
48
|
+
if api_key is not None and len(api_key) != EXPECTED_KEY_LENGTH:
|
|
49
|
+
warnings.warn(
|
|
50
|
+
f"BLS API key is {len(api_key)} characters; expected {EXPECTED_KEY_LENGTH}.",
|
|
51
|
+
stacklevel=3,
|
|
52
|
+
)
|
|
53
|
+
return api_key
|
blsapi/enums.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class Status(str, Enum):
|
|
5
|
+
"""Top-level `status` values BLS can return in the JSON envelope."""
|
|
6
|
+
|
|
7
|
+
SUCCEEDED = "REQUEST_SUCCEEDED"
|
|
8
|
+
NOT_PROCESSED = "REQUEST_NOT_PROCESSED"
|
|
9
|
+
FAILED = "REQUEST_FAILED"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class PeriodType(str, Enum):
|
|
13
|
+
"""Classification of a BLS period code, derived from the M/Q/S/A prefix.
|
|
14
|
+
|
|
15
|
+
ANNUAL_AVERAGE is special: M13, Q05, and S03 are *computed averages* over the year,
|
|
16
|
+
not a point in calendar time, so they get no real `date` (see frames.period_to_date).
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
MONTHLY = "monthly"
|
|
20
|
+
QUARTERLY = "quarterly"
|
|
21
|
+
SEMIANNUAL = "semiannual"
|
|
22
|
+
ANNUAL = "annual"
|
|
23
|
+
ANNUAL_AVERAGE = "annual_average"
|
blsapi/exceptions.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""Exception hierarchy for the blsapi client."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class BLSError(Exception):
|
|
5
|
+
"""Base class for every error raised by this library."""
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class BLSValidationError(BLSError):
|
|
9
|
+
"""Bad input caught locally."""
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class BLSAPIError(BLSError):
|
|
13
|
+
"""BLS processed the request but returned a non-success `status`.
|
|
14
|
+
|
|
15
|
+
Carries the raw status string and the `message` list so callers can inspect them.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
message: str,
|
|
21
|
+
*,
|
|
22
|
+
status: str | None = None,
|
|
23
|
+
messages: list[str] | None = None,
|
|
24
|
+
) -> None:
|
|
25
|
+
super().__init__(message)
|
|
26
|
+
self.status = status
|
|
27
|
+
self.messages = messages or []
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class BLSHTTPError(BLSError):
|
|
31
|
+
"""Transport- or HTTP-level failure that survived our retry loop."""
|
blsapi/frames.py
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
"""Shape BLS results into tidy Polars DataFrames.
|
|
2
|
+
|
|
3
|
+
period_to_date() handles the M/Q/S/A period-code -> calendar-date mapping, including
|
|
4
|
+
the annual-average trap (M13/Q05/S03 don't have a real date). series_to_frame() runs the flatten
|
|
5
|
+
+ value-coercion + date-construction pipeline, and pivot_wide() reshapes the long frame to one column per series.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import datetime as dt
|
|
9
|
+
from collections.abc import Mapping, Sequence
|
|
10
|
+
from typing import TYPE_CHECKING
|
|
11
|
+
|
|
12
|
+
import polars as pl
|
|
13
|
+
|
|
14
|
+
from .enums import PeriodType
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from .models import SeriesResult
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def period_to_date(year: int, period: str) -> tuple[PeriodType, dt.date | None]:
|
|
21
|
+
"""Map a (year, BLS period code) to a (PeriodType, first-of-period date).
|
|
22
|
+
|
|
23
|
+
Annual-average codes (M13, Q05, S03) classify as ANNUAL_AVERAGE and return a None
|
|
24
|
+
date, because they're a computed average over the year, not a moment in time. Keeping
|
|
25
|
+
a None date lets callers filter them out cleanly.
|
|
26
|
+
|
|
27
|
+
Raises ValueError on an unrecognized code.
|
|
28
|
+
"""
|
|
29
|
+
code = period.strip().upper()
|
|
30
|
+
kind, number = code[:1], int(code[1:])
|
|
31
|
+
|
|
32
|
+
if kind == "M":
|
|
33
|
+
if 1 <= number <= 12:
|
|
34
|
+
return PeriodType.MONTHLY, dt.date(year, number, 1)
|
|
35
|
+
if number == 13:
|
|
36
|
+
return PeriodType.ANNUAL_AVERAGE, None
|
|
37
|
+
elif kind == "Q":
|
|
38
|
+
if 1 <= number <= 4:
|
|
39
|
+
# Quarter start months: Q1->Jan, Q2->Apr, Q3->Jul, Q4->Oct.
|
|
40
|
+
return PeriodType.QUARTERLY, dt.date(year, (number - 1) * 3 + 1, 1)
|
|
41
|
+
if number == 5:
|
|
42
|
+
return PeriodType.ANNUAL_AVERAGE, None
|
|
43
|
+
elif kind == "S":
|
|
44
|
+
if number in (1, 2):
|
|
45
|
+
# Semiannual: S1->Jan (H1), S2->Jul (H2).
|
|
46
|
+
return PeriodType.SEMIANNUAL, dt.date(year, 1 if number == 1 else 7, 1)
|
|
47
|
+
if number == 3:
|
|
48
|
+
return PeriodType.ANNUAL_AVERAGE, None
|
|
49
|
+
elif kind == "A":
|
|
50
|
+
return PeriodType.ANNUAL, dt.date(year, 1, 1)
|
|
51
|
+
|
|
52
|
+
raise ValueError(f"Unrecognized BLS period code: {period!r}")
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# Stable schema for an empty result.
|
|
56
|
+
_EMPTY_SCHEMA = {
|
|
57
|
+
"series_id": pl.String,
|
|
58
|
+
"year": pl.Int64,
|
|
59
|
+
"period": pl.String,
|
|
60
|
+
"period_name": pl.String,
|
|
61
|
+
"period_type": pl.String,
|
|
62
|
+
"date": pl.Date,
|
|
63
|
+
"value": pl.Float64,
|
|
64
|
+
"latest": pl.Boolean,
|
|
65
|
+
"footnotes": pl.List,
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def series_to_frame(
|
|
70
|
+
series: "Sequence[SeriesResult]",
|
|
71
|
+
*,
|
|
72
|
+
parse_values: bool = True,
|
|
73
|
+
aliases: "Mapping[str, str] | None" = None,
|
|
74
|
+
) -> pl.DataFrame:
|
|
75
|
+
"""Flatten BLS series results into one tidy/long Polars DataFrame.
|
|
76
|
+
|
|
77
|
+
Each input SeriesResult has `.series_id` and `.data` (a list of raw BLS point dicts).
|
|
78
|
+
Output is one row per (series, period), sorted by series then date.
|
|
79
|
+
|
|
80
|
+
parse_values: when True (default) cast `value` to Float64; when False keep the raw BLS
|
|
81
|
+
strings (mirrors blsR's parse_values=FALSE).
|
|
82
|
+
aliases: optional {bls_id: label} to relabel the series_id column.
|
|
83
|
+
"""
|
|
84
|
+
rows: list[dict] = []
|
|
85
|
+
for result in series:
|
|
86
|
+
for point in result.data:
|
|
87
|
+
year = int(point["year"])
|
|
88
|
+
period_type, date = period_to_date(year, point["period"])
|
|
89
|
+
row = {
|
|
90
|
+
"series_id": result.series_id,
|
|
91
|
+
"year": year,
|
|
92
|
+
"period": point["period"],
|
|
93
|
+
"period_name": point.get("periodName"),
|
|
94
|
+
"period_type": period_type.value,
|
|
95
|
+
"date": date,
|
|
96
|
+
"value": point.get("value"),
|
|
97
|
+
"latest": bool(point.get("latest", False)),
|
|
98
|
+
"footnotes": [fn for fn in point["footnotes"] if fn],
|
|
99
|
+
}
|
|
100
|
+
if "calculations" in point:
|
|
101
|
+
net = point["calculations"].get("net_changes", {})
|
|
102
|
+
pct = point["calculations"].get("pct_changes", {})
|
|
103
|
+
for p in (1, 3, 6, 12):
|
|
104
|
+
row[f"net_change_{p}m"] = net.get(str(p))
|
|
105
|
+
row[f"pct_change_{p}m"] = pct.get(str(p))
|
|
106
|
+
rows.append(row)
|
|
107
|
+
|
|
108
|
+
if not rows:
|
|
109
|
+
schema = dict(_EMPTY_SCHEMA)
|
|
110
|
+
if not parse_values:
|
|
111
|
+
schema["value"] = pl.String
|
|
112
|
+
return pl.DataFrame(schema=schema)
|
|
113
|
+
|
|
114
|
+
frame = pl.DataFrame(rows, infer_schema_length=None).with_columns(pl.col("date").cast(pl.Date))
|
|
115
|
+
|
|
116
|
+
if parse_values:
|
|
117
|
+
calc_cols = [c for c in frame.columns if c.startswith(("net_change_", "pct_change_"))]
|
|
118
|
+
frame = frame.with_columns(
|
|
119
|
+
pl.col("value").str.strip_chars().cast(pl.Float64, strict=False),
|
|
120
|
+
*(pl.col(c).cast(pl.Float64, strict=False) for c in calc_cols),
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
if aliases:
|
|
124
|
+
frame = frame.with_columns(pl.col("series_id").replace(aliases))
|
|
125
|
+
return frame.sort(["series_id", "date"], nulls_last=True)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def pivot_wide(df: pl.DataFrame) -> pl.DataFrame:
|
|
129
|
+
"""Pivot the long frame to one value-column per series, indexed by date.
|
|
130
|
+
|
|
131
|
+
Annual-average rows have a null date, so they're filtered out before pivoting; pivot on
|
|
132
|
+
(year, period) instead if you need to keep them.
|
|
133
|
+
"""
|
|
134
|
+
return (
|
|
135
|
+
df.filter(pl.col("date").is_not_null())
|
|
136
|
+
.pivot(values="value", index="date", on="series_id")
|
|
137
|
+
.sort("date")
|
|
138
|
+
)
|
blsapi/models.py
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
"""Pydantic models:
|
|
2
|
+
1. SeriesRequest - validate inputs before sending the request.
|
|
3
|
+
2. BLSResponse - validate the response and raise typed errors on failure.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import warnings
|
|
9
|
+
from collections.abc import Mapping, Sequence
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from pydantic import BaseModel, ConfigDict, Field, model_validator
|
|
13
|
+
|
|
14
|
+
from .enums import Status
|
|
15
|
+
from .exceptions import BLSAPIError, BLSValidationError
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def normalize_series_ids(
|
|
19
|
+
series_ids: str | Sequence[str] | Mapping[str, str],
|
|
20
|
+
) -> tuple[list[str], dict[str, str] | None]:
|
|
21
|
+
"""Normalize the accepted `series_ids` shapes into (bls_ids, aliases).
|
|
22
|
+
|
|
23
|
+
Accepts a single id, a sequence of ids, or a {label: id} mapping. Returns the list of
|
|
24
|
+
BLS ids to request plus an optional {bls_id: label} map (inverted from the mapping) so
|
|
25
|
+
downstream code can relabel the output. aliases is None when no mapping was given.
|
|
26
|
+
|
|
27
|
+
This mirrors blsR's named-list feature. Raises BLSValidationError on empty input, blank
|
|
28
|
+
ids, or a mapping whose ids aren't unique.
|
|
29
|
+
"""
|
|
30
|
+
if isinstance(series_ids, str):
|
|
31
|
+
_check_ids([series_ids])
|
|
32
|
+
return [series_ids], None
|
|
33
|
+
if isinstance(series_ids, Mapping):
|
|
34
|
+
if not series_ids:
|
|
35
|
+
raise BLSValidationError("series_ids mapping must not be empty")
|
|
36
|
+
ids = list(series_ids.values())
|
|
37
|
+
_check_ids(ids)
|
|
38
|
+
aliases = {bls_id: label for label, bls_id in series_ids.items()}
|
|
39
|
+
if len(aliases) != len(ids):
|
|
40
|
+
raise BLSValidationError("series_ids mapping maps two labels to the same id")
|
|
41
|
+
return ids, aliases
|
|
42
|
+
ids = list(series_ids)
|
|
43
|
+
if not ids:
|
|
44
|
+
raise BLSValidationError("series_ids must not be empty")
|
|
45
|
+
_check_ids(ids)
|
|
46
|
+
return ids, None
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _check_ids(ids: list[str]) -> None:
|
|
50
|
+
if not all(isinstance(i, str) and i.strip() for i in ids):
|
|
51
|
+
raise BLSValidationError("every series id must be a non-empty string")
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class SeriesRequest(BaseModel):
|
|
55
|
+
"""Validated inputs for a POST timeseries/data request."""
|
|
56
|
+
|
|
57
|
+
series_ids: list[str]
|
|
58
|
+
start_year: int | None = None
|
|
59
|
+
end_year: int | None = None
|
|
60
|
+
catalog: bool = False
|
|
61
|
+
calculations: bool = False
|
|
62
|
+
annual_average: bool = False
|
|
63
|
+
aspects: bool = False
|
|
64
|
+
latest: bool = False
|
|
65
|
+
|
|
66
|
+
@model_validator(mode="after")
|
|
67
|
+
def _validate(self) -> "SeriesRequest":
|
|
68
|
+
if not self.series_ids:
|
|
69
|
+
raise BLSValidationError("series_ids must not be empty")
|
|
70
|
+
if (
|
|
71
|
+
self.start_year is not None
|
|
72
|
+
and self.end_year is not None
|
|
73
|
+
and self.start_year > self.end_year
|
|
74
|
+
):
|
|
75
|
+
raise BLSValidationError("start_year must be <= end_year")
|
|
76
|
+
return self
|
|
77
|
+
|
|
78
|
+
def to_payload(self, api_key: str | None) -> dict[str, Any]:
|
|
79
|
+
"""Render the minimal JSON body that the BLS API expects."""
|
|
80
|
+
payload: dict = {"seriesid": self.series_ids}
|
|
81
|
+
|
|
82
|
+
if self.catalog:
|
|
83
|
+
payload["catalog"] = True
|
|
84
|
+
if self.calculations:
|
|
85
|
+
payload["calculations"] = True
|
|
86
|
+
if self.annual_average:
|
|
87
|
+
payload["annualaverage"] = True
|
|
88
|
+
if self.aspects:
|
|
89
|
+
payload["aspects"] = True
|
|
90
|
+
if self.latest:
|
|
91
|
+
payload["latest"] = True
|
|
92
|
+
if self.start_year is not None:
|
|
93
|
+
payload["startyear"] = str(self.start_year)
|
|
94
|
+
if self.end_year is not None:
|
|
95
|
+
payload["endyear"] = str(self.end_year)
|
|
96
|
+
if api_key is not None:
|
|
97
|
+
payload["registrationkey"] = api_key
|
|
98
|
+
|
|
99
|
+
return payload
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class SeriesResult(BaseModel):
|
|
103
|
+
"""One series in the response."""
|
|
104
|
+
|
|
105
|
+
model_config = ConfigDict(populate_by_name=True)
|
|
106
|
+
|
|
107
|
+
series_id: str = Field(alias="seriesID")
|
|
108
|
+
catalog: dict[str, Any] | None = None
|
|
109
|
+
data: list[dict[str, Any]] = Field(default_factory=list)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class Results(BaseModel):
|
|
113
|
+
series: list[SeriesResult] = Field(default_factory=list)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class BLSResponse(BaseModel):
|
|
117
|
+
"""The top-level error checking"""
|
|
118
|
+
|
|
119
|
+
model_config = ConfigDict(populate_by_name=True)
|
|
120
|
+
|
|
121
|
+
status: str
|
|
122
|
+
response_time: int | None = Field(default=None, alias="responseTime")
|
|
123
|
+
message: list[str] = Field(default_factory=list)
|
|
124
|
+
results: Results | None = Field(default=None, alias="Results")
|
|
125
|
+
|
|
126
|
+
@model_validator(mode="after")
|
|
127
|
+
def validate_response(self) -> BLSResponse:
|
|
128
|
+
if self.status != Status.SUCCEEDED:
|
|
129
|
+
raise BLSAPIError(
|
|
130
|
+
"; ".join(self.message) or self.status,
|
|
131
|
+
status=self.status,
|
|
132
|
+
messages=self.message,
|
|
133
|
+
)
|
|
134
|
+
for msg in self.message:
|
|
135
|
+
warnings.warn(msg, stacklevel=2)
|
|
136
|
+
return self
|
|
137
|
+
|
|
138
|
+
@property
|
|
139
|
+
def series(self) -> list[SeriesResult]:
|
|
140
|
+
"""Convenience accessor"""
|
|
141
|
+
return self.results.series if self.results is not None else []
|
blsapi/py.typed
ADDED
|
File without changes
|