sdgdata 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sdgdata-0.1.0/LICENSE +21 -0
- sdgdata-0.1.0/MANIFEST.in +2 -0
- sdgdata-0.1.0/PKG-INFO +114 -0
- sdgdata-0.1.0/README.md +88 -0
- sdgdata-0.1.0/pyproject.toml +88 -0
- sdgdata-0.1.0/setup.cfg +4 -0
- sdgdata-0.1.0/src/sdgdata/__init__.py +5 -0
- sdgdata-0.1.0/src/sdgdata/client.py +305 -0
- sdgdata-0.1.0/src/sdgdata/debug.py +36 -0
- sdgdata-0.1.0/src/sdgdata/metadata.py +28 -0
- sdgdata-0.1.0/src/sdgdata/models.py +374 -0
- sdgdata-0.1.0/src/sdgdata/utilities.py +156 -0
- sdgdata-0.1.0/src/sdgdata.egg-info/PKG-INFO +114 -0
- sdgdata-0.1.0/src/sdgdata.egg-info/SOURCES.txt +15 -0
- sdgdata-0.1.0/src/sdgdata.egg-info/dependency_links.txt +1 -0
- sdgdata-0.1.0/src/sdgdata.egg-info/requires.txt +2 -0
- sdgdata-0.1.0/src/sdgdata.egg-info/top_level.txt +1 -0
sdgdata-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Vassily Trubetskoy
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
sdgdata-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: sdgdata
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Simple Python client for the United Nations Statistics Division SDG API
|
|
5
|
+
Author-email: Vassily Trubetskoy <3219751+v-a-s-a@users.noreply.github.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/v-a-s-a/sdgdata
|
|
8
|
+
Project-URL: Repository, https://github.com/v-a-s-a/sdgdata
|
|
9
|
+
Project-URL: Issues, https://github.com/v-a-s-a/sdgdata/issues
|
|
10
|
+
Project-URL: Documentation, https://github.com/v-a-s-a/sdgdata#readme
|
|
11
|
+
Keywords: sdg,statistics,sustainable-development-goals,united-nations,unsd
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
19
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
20
|
+
Requires-Python: >=3.12
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
License-File: LICENSE
|
|
23
|
+
Requires-Dist: httpx>=0.28.1
|
|
24
|
+
Requires-Dist: pydantic>=2.12.3
|
|
25
|
+
Dynamic: license-file
|
|
26
|
+
|
|
27
|
+
# sdgdata
|
|
28
|
+
|
|
29
|
+
`sdgdata` is an unofficial Python client for SDG data from the UNSD SDG API.
|
|
30
|
+
|
|
31
|
+
It provides a simple client to retrieve Sustainable Development Goal metadata
|
|
32
|
+
and data from Python, with models derived from the UNSD SDG API schema.
|
|
33
|
+
|
|
34
|
+
## Features
|
|
35
|
+
|
|
36
|
+
- Fetch SDG goals, targets, indicators, and series metadata.
|
|
37
|
+
- Look up geographic areas and M49 area codes.
|
|
38
|
+
- Retrieve paginated SDG series observations with simple Python calls.
|
|
39
|
+
- Validate structured API responses with Pydantic models.
|
|
40
|
+
- Load observation data into analysis tools such as pandas or Polars.
|
|
41
|
+
|
|
42
|
+
## Installation
|
|
43
|
+
|
|
44
|
+
Install from PyPI:
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
uv add sdgdata
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
or:
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
pip install sdgdata
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Quick Start
|
|
57
|
+
|
|
58
|
+
```python
|
|
59
|
+
from sdgdata import SDGClient
|
|
60
|
+
from sdgdata.client import is_single_time_series
|
|
61
|
+
|
|
62
|
+
client = SDGClient()
|
|
63
|
+
|
|
64
|
+
# Find available geographic areas and SDG targets.
|
|
65
|
+
areas = client.get_geo_areas()
|
|
66
|
+
targets = client.get_targets()
|
|
67
|
+
|
|
68
|
+
# Find latest-release series codes for a target.
|
|
69
|
+
series = client.get_series_codes(target_code="3.8")
|
|
70
|
+
series_code = series[-1].code
|
|
71
|
+
area_code = areas[0].geoAreaCode
|
|
72
|
+
|
|
73
|
+
# Inspect available disaggregation dimensions for a series.
|
|
74
|
+
dimensions = client.get_series_dimensions(series_code)
|
|
75
|
+
|
|
76
|
+
# Fetch the coarsest available disaggregation by default.
|
|
77
|
+
data = client.get_series_data(
|
|
78
|
+
series_codes=[series_code],
|
|
79
|
+
area_code=area_code,
|
|
80
|
+
start_period="2015",
|
|
81
|
+
end_period="2026",
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
assert is_single_time_series(data)
|
|
85
|
+
|
|
86
|
+
# To fetch every disaggregation, opt into the unfiltered API response.
|
|
87
|
+
all_disaggregations = client.get_series_data(
|
|
88
|
+
series_codes=[series_code],
|
|
89
|
+
area_code=area_code,
|
|
90
|
+
start_period="2015",
|
|
91
|
+
end_period="2026",
|
|
92
|
+
dimensions="all",
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
# Or request a specific disaggregation slice.
|
|
96
|
+
custom_slice = client.get_series_data(
|
|
97
|
+
series_codes=[series_code],
|
|
98
|
+
area_code=area_code,
|
|
99
|
+
dimensions={"Reporting Type": "G"},
|
|
100
|
+
)
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
`get_series_data()` returns a list of dictionaries, making it straightforward
|
|
104
|
+
to create a dataframe for analysis. It normalizes singleton `goal`, `target`,
|
|
105
|
+
and `indicator` arrays to strings, and integral `timePeriodStart` values to
|
|
106
|
+
integers. By default, it filters to the coarsest available disaggregation, such
|
|
107
|
+
as all ages, both sexes, and total groups when those dimension values exist.
|
|
108
|
+
For the upstream observation field descriptions, see the
|
|
109
|
+
[UNSD SDG API Swagger documentation](https://unstats.un.org/sdgapi/swagger/).
|
|
110
|
+
|
|
111
|
+
## Documentation
|
|
112
|
+
|
|
113
|
+
- [Development](docs/development.md): setup, tests, fixture refreshes, builds, and CI behavior.
|
|
114
|
+
- [Model generation](docs/model-generation.md): generated models, stale checks, and OpenAPI source data.
|
sdgdata-0.1.0/README.md
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# sdgdata
|
|
2
|
+
|
|
3
|
+
`sdgdata` is an unofficial Python client for SDG data from the UNSD SDG API.
|
|
4
|
+
|
|
5
|
+
It provides a simple client to retrieve Sustainable Development Goal metadata
|
|
6
|
+
and data from Python, with models derived from the UNSD SDG API schema.
|
|
7
|
+
|
|
8
|
+
## Features
|
|
9
|
+
|
|
10
|
+
- Fetch SDG goals, targets, indicators, and series metadata.
|
|
11
|
+
- Look up geographic areas and M49 area codes.
|
|
12
|
+
- Retrieve paginated SDG series observations with simple Python calls.
|
|
13
|
+
- Validate structured API responses with Pydantic models.
|
|
14
|
+
- Load observation data into analysis tools such as pandas or Polars.
|
|
15
|
+
|
|
16
|
+
## Installation
|
|
17
|
+
|
|
18
|
+
Install from PyPI:
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
uv add sdgdata
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
or:
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
pip install sdgdata
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Quick Start
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
from sdgdata import SDGClient
|
|
34
|
+
from sdgdata.client import is_single_time_series
|
|
35
|
+
|
|
36
|
+
client = SDGClient()
|
|
37
|
+
|
|
38
|
+
# Find available geographic areas and SDG targets.
|
|
39
|
+
areas = client.get_geo_areas()
|
|
40
|
+
targets = client.get_targets()
|
|
41
|
+
|
|
42
|
+
# Find latest-release series codes for a target.
|
|
43
|
+
series = client.get_series_codes(target_code="3.8")
|
|
44
|
+
series_code = series[-1].code
|
|
45
|
+
area_code = areas[0].geoAreaCode
|
|
46
|
+
|
|
47
|
+
# Inspect available disaggregation dimensions for a series.
|
|
48
|
+
dimensions = client.get_series_dimensions(series_code)
|
|
49
|
+
|
|
50
|
+
# Fetch the coarsest available disaggregation by default.
|
|
51
|
+
data = client.get_series_data(
|
|
52
|
+
series_codes=[series_code],
|
|
53
|
+
area_code=area_code,
|
|
54
|
+
start_period="2015",
|
|
55
|
+
end_period="2026",
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
assert is_single_time_series(data)
|
|
59
|
+
|
|
60
|
+
# To fetch every disaggregation, opt into the unfiltered API response.
|
|
61
|
+
all_disaggregations = client.get_series_data(
|
|
62
|
+
series_codes=[series_code],
|
|
63
|
+
area_code=area_code,
|
|
64
|
+
start_period="2015",
|
|
65
|
+
end_period="2026",
|
|
66
|
+
dimensions="all",
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
# Or request a specific disaggregation slice.
|
|
70
|
+
custom_slice = client.get_series_data(
|
|
71
|
+
series_codes=[series_code],
|
|
72
|
+
area_code=area_code,
|
|
73
|
+
dimensions={"Reporting Type": "G"},
|
|
74
|
+
)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
`get_series_data()` returns a list of dictionaries, making it straightforward
|
|
78
|
+
to create a dataframe for analysis. It normalizes singleton `goal`, `target`,
|
|
79
|
+
and `indicator` arrays to strings, and integral `timePeriodStart` values to
|
|
80
|
+
integers. By default, it filters to the coarsest available disaggregation, such
|
|
81
|
+
as all ages, both sexes, and total groups when those dimension values exist.
|
|
82
|
+
For the upstream observation field descriptions, see the
|
|
83
|
+
[UNSD SDG API Swagger documentation](https://unstats.un.org/sdgapi/swagger/).
|
|
84
|
+
|
|
85
|
+
## Documentation
|
|
86
|
+
|
|
87
|
+
- [Development](docs/development.md): setup, tests, fixture refreshes, builds, and CI behavior.
|
|
88
|
+
- [Model generation](docs/model-generation.md): generated models, stale checks, and OpenAPI source data.
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=77.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "sdgdata"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Simple Python client for the United Nations Statistics Division SDG API"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
license-files = ["LICENSE"]
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "Vassily Trubetskoy", email = "3219751+v-a-s-a@users.noreply.github.com" },
|
|
14
|
+
]
|
|
15
|
+
requires-python = ">=3.12"
|
|
16
|
+
keywords = [
|
|
17
|
+
"sdg",
|
|
18
|
+
"statistics",
|
|
19
|
+
"sustainable-development-goals",
|
|
20
|
+
"united-nations",
|
|
21
|
+
"unsd",
|
|
22
|
+
]
|
|
23
|
+
classifiers = [
|
|
24
|
+
"Development Status :: 3 - Alpha",
|
|
25
|
+
"Intended Audience :: Developers",
|
|
26
|
+
"Intended Audience :: Science/Research",
|
|
27
|
+
"Operating System :: OS Independent",
|
|
28
|
+
"Programming Language :: Python :: 3",
|
|
29
|
+
"Programming Language :: Python :: 3.12",
|
|
30
|
+
"Topic :: Scientific/Engineering :: Information Analysis",
|
|
31
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
32
|
+
]
|
|
33
|
+
dependencies = [
|
|
34
|
+
"httpx>=0.28.1",
|
|
35
|
+
"pydantic>=2.12.3",
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
[project.urls]
|
|
39
|
+
Homepage = "https://github.com/v-a-s-a/sdgdata"
|
|
40
|
+
Repository = "https://github.com/v-a-s-a/sdgdata"
|
|
41
|
+
Issues = "https://github.com/v-a-s-a/sdgdata/issues"
|
|
42
|
+
Documentation = "https://github.com/v-a-s-a/sdgdata#readme"
|
|
43
|
+
|
|
44
|
+
[dependency-groups]
|
|
45
|
+
dev = [
|
|
46
|
+
"openapi-python-client==0.28.4",
|
|
47
|
+
"pytest>=8.0.0",
|
|
48
|
+
"respx>=0.22.0",
|
|
49
|
+
"ruff>=0.8.0",
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
[tool.pytest.ini_options]
|
|
53
|
+
markers = [
|
|
54
|
+
"mock: deterministic tests that mock the UNSD API",
|
|
55
|
+
"live: tests that call the live UNSD API",
|
|
56
|
+
]
|
|
57
|
+
|
|
58
|
+
[tool.ruff]
|
|
59
|
+
target-version = "py312"
|
|
60
|
+
line-length = 100
|
|
61
|
+
src = ["src", "tests", "scripts"]
|
|
62
|
+
exclude = [
|
|
63
|
+
"generated/openapi_python_client",
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
[tool.ruff.lint]
|
|
67
|
+
select = [
|
|
68
|
+
"E",
|
|
69
|
+
"F",
|
|
70
|
+
"I",
|
|
71
|
+
"UP",
|
|
72
|
+
"B",
|
|
73
|
+
"SIM",
|
|
74
|
+
"RUF",
|
|
75
|
+
]
|
|
76
|
+
ignore = []
|
|
77
|
+
|
|
78
|
+
[tool.ruff.format]
|
|
79
|
+
quote-style = "double"
|
|
80
|
+
indent-style = "space"
|
|
81
|
+
line-ending = "auto"
|
|
82
|
+
|
|
83
|
+
[tool.setuptools]
|
|
84
|
+
package-dir = {"" = "src"}
|
|
85
|
+
|
|
86
|
+
[tool.setuptools.packages.find]
|
|
87
|
+
where = ["src"]
|
|
88
|
+
include = ["sdgdata*"]
|
sdgdata-0.1.0/setup.cfg
ADDED
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
from collections.abc import Mapping, Sequence
|
|
2
|
+
|
|
3
|
+
import httpx
|
|
4
|
+
|
|
5
|
+
from sdgdata.models import (
|
|
6
|
+
ApiDimension,
|
|
7
|
+
ApiGeoArea,
|
|
8
|
+
ApiGoal,
|
|
9
|
+
ApiIndicator,
|
|
10
|
+
ApiSerie,
|
|
11
|
+
ApiTarget,
|
|
12
|
+
ConceptsMasterData,
|
|
13
|
+
SDMXMetaDataResponse,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
from . import debug
|
|
17
|
+
from .metadata import IndicatorSeriesMetadata, SeriesMetadata, TargetSeriesMetadata
|
|
18
|
+
from .utilities import (
|
|
19
|
+
DimensionArgument,
|
|
20
|
+
_coarsest_dimension_filters,
|
|
21
|
+
_dimension_payload,
|
|
22
|
+
_normalize_observation,
|
|
23
|
+
_period_query_params,
|
|
24
|
+
_release_sort_key,
|
|
25
|
+
_series_code_list,
|
|
26
|
+
)
|
|
27
|
+
from .utilities import (
|
|
28
|
+
is_single_time_series as is_single_time_series,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
# standard UNSD API base URL
|
|
32
|
+
BASE_URL = "https://unstats.un.org/sdgapi/v1"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _group_series_metadata(
|
|
36
|
+
series_items: list[ApiSerie],
|
|
37
|
+
dimensions_by_code: Mapping[str, list[ApiDimension]],
|
|
38
|
+
) -> list[SeriesMetadata]:
|
|
39
|
+
"""
|
|
40
|
+
Groups repeated UNSD release rows into one discovery record per series code.
|
|
41
|
+
"""
|
|
42
|
+
series_by_code: dict[str, list[ApiSerie]] = {}
|
|
43
|
+
for series in series_items:
|
|
44
|
+
if series.code is None:
|
|
45
|
+
continue
|
|
46
|
+
series_by_code.setdefault(series.code, []).append(series)
|
|
47
|
+
|
|
48
|
+
grouped_series = []
|
|
49
|
+
for code, releases in series_by_code.items():
|
|
50
|
+
latest = max(releases, key=lambda item: _release_sort_key(item.release))
|
|
51
|
+
release_codes = sorted(
|
|
52
|
+
{release.release for release in releases if release.release is not None},
|
|
53
|
+
key=_release_sort_key,
|
|
54
|
+
)
|
|
55
|
+
grouped_series.append(
|
|
56
|
+
SeriesMetadata(
|
|
57
|
+
code=code,
|
|
58
|
+
description=latest.description,
|
|
59
|
+
uri=latest.uri,
|
|
60
|
+
latest_release=latest.release,
|
|
61
|
+
releases=release_codes,
|
|
62
|
+
dimensions=dimensions_by_code.get(code, []),
|
|
63
|
+
)
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
return sorted(grouped_series, key=lambda series: series.code)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _indicator_series_metadata(
|
|
70
|
+
indicator: ApiIndicator,
|
|
71
|
+
dimensions_by_code: Mapping[str, list[ApiDimension]],
|
|
72
|
+
) -> IndicatorSeriesMetadata:
|
|
73
|
+
"""
|
|
74
|
+
Builds the public discovery model for one generated UNSD indicator model.
|
|
75
|
+
"""
|
|
76
|
+
return IndicatorSeriesMetadata(
|
|
77
|
+
code=indicator.code or "",
|
|
78
|
+
description=indicator.description,
|
|
79
|
+
tier=indicator.tier,
|
|
80
|
+
uri=indicator.uri,
|
|
81
|
+
series=_group_series_metadata(indicator.series or [], dimensions_by_code),
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class SDGClient:
|
|
86
|
+
def __init__(self):
|
|
87
|
+
self.client = httpx.Client(base_url=BASE_URL, timeout=30.0)
|
|
88
|
+
|
|
89
|
+
def _get(self, url: str, *, params: dict | None = None) -> httpx.Response:
|
|
90
|
+
request = self.client.build_request("GET", url, params=params)
|
|
91
|
+
debug.print_query(request)
|
|
92
|
+
return self.client.send(request)
|
|
93
|
+
|
|
94
|
+
def get_targets(self, include_children: bool = False) -> list[ApiTarget]:
|
|
95
|
+
"""
|
|
96
|
+
Fetches all targets, optionally including their indicators and series.
|
|
97
|
+
"""
|
|
98
|
+
response = self._get("/sdg/Target/List", params={"includechildren": include_children})
|
|
99
|
+
response.raise_for_status()
|
|
100
|
+
data = response.json()
|
|
101
|
+
return [ApiTarget(**item) for item in data]
|
|
102
|
+
|
|
103
|
+
def get_series_codes(
|
|
104
|
+
self, target_code: str | None = None, *, all_releases: bool = False
|
|
105
|
+
) -> list[ApiSerie]:
|
|
106
|
+
"""
|
|
107
|
+
Returns latest series codes and descriptions, optionally filtered by target code.
|
|
108
|
+
"""
|
|
109
|
+
targets = self.get_targets(include_children=True)
|
|
110
|
+
series_list = []
|
|
111
|
+
for target in targets:
|
|
112
|
+
if target_code and target.code != target_code:
|
|
113
|
+
continue
|
|
114
|
+
if target.indicators:
|
|
115
|
+
for indicator in target.indicators:
|
|
116
|
+
if indicator.series:
|
|
117
|
+
series_list.extend(indicator.series)
|
|
118
|
+
if all_releases:
|
|
119
|
+
return series_list
|
|
120
|
+
|
|
121
|
+
latest_by_code = {}
|
|
122
|
+
for series in series_list:
|
|
123
|
+
if series.code is None:
|
|
124
|
+
continue
|
|
125
|
+
current = latest_by_code.get(series.code)
|
|
126
|
+
if current is None or _release_sort_key(series.release) > _release_sort_key(
|
|
127
|
+
current.release
|
|
128
|
+
):
|
|
129
|
+
latest_by_code[series.code] = series
|
|
130
|
+
|
|
131
|
+
return list(latest_by_code.values())
|
|
132
|
+
|
|
133
|
+
def get_indicator_series(self, indicator_code: str) -> IndicatorSeriesMetadata:
|
|
134
|
+
"""
|
|
135
|
+
Returns grouped series metadata and dimensions for an indicator.
|
|
136
|
+
"""
|
|
137
|
+
for target in self.get_targets(include_children=True):
|
|
138
|
+
for indicator in target.indicators or []:
|
|
139
|
+
if indicator.code == indicator_code:
|
|
140
|
+
series_codes = {
|
|
141
|
+
series.code for series in indicator.series or [] if series.code is not None
|
|
142
|
+
}
|
|
143
|
+
dimensions_by_code = {
|
|
144
|
+
code: self.get_series_dimensions(code) for code in series_codes
|
|
145
|
+
}
|
|
146
|
+
return _indicator_series_metadata(indicator, dimensions_by_code)
|
|
147
|
+
|
|
148
|
+
raise ValueError(f"indicator code {indicator_code!r} was not found")
|
|
149
|
+
|
|
150
|
+
def get_target_series(self, target_code: str) -> TargetSeriesMetadata:
|
|
151
|
+
"""
|
|
152
|
+
Returns grouped indicator and series metadata for a target.
|
|
153
|
+
"""
|
|
154
|
+
for target in self.get_targets(include_children=True):
|
|
155
|
+
if target.code != target_code:
|
|
156
|
+
continue
|
|
157
|
+
|
|
158
|
+
series_codes = {
|
|
159
|
+
series.code
|
|
160
|
+
for indicator in target.indicators or []
|
|
161
|
+
for series in indicator.series or []
|
|
162
|
+
if series.code is not None
|
|
163
|
+
}
|
|
164
|
+
dimensions_by_code = {code: self.get_series_dimensions(code) for code in series_codes}
|
|
165
|
+
return TargetSeriesMetadata(
|
|
166
|
+
code=target.code or "",
|
|
167
|
+
title=target.title,
|
|
168
|
+
description=target.description,
|
|
169
|
+
uri=target.uri,
|
|
170
|
+
indicators=[
|
|
171
|
+
_indicator_series_metadata(indicator, dimensions_by_code)
|
|
172
|
+
for indicator in target.indicators or []
|
|
173
|
+
],
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
raise ValueError(f"target code {target_code!r} was not found")
|
|
177
|
+
|
|
178
|
+
def get_geo_areas(self) -> list[ApiGeoArea]:
|
|
179
|
+
"""
|
|
180
|
+
Returns a list of geographic areas and their M49 codes.
|
|
181
|
+
"""
|
|
182
|
+
response = self._get("/sdg/GeoArea/List")
|
|
183
|
+
response.raise_for_status()
|
|
184
|
+
data = response.json()
|
|
185
|
+
return [ApiGeoArea(**item) for item in data]
|
|
186
|
+
|
|
187
|
+
def get_goals(self) -> list[ApiGoal]:
|
|
188
|
+
"""
|
|
189
|
+
Fetches all SDG goals.
|
|
190
|
+
"""
|
|
191
|
+
response = self._get("/sdg/Goal/List")
|
|
192
|
+
response.raise_for_status()
|
|
193
|
+
data = response.json()
|
|
194
|
+
return [ApiGoal(**item) for item in data]
|
|
195
|
+
|
|
196
|
+
def get_indicators(self, include_series: bool = True) -> list[ApiTarget]:
|
|
197
|
+
"""
|
|
198
|
+
Fetches all indicators, optionally including their series.
|
|
199
|
+
"""
|
|
200
|
+
response = self._get("/sdg/Indicator/List", params={"includechildren": include_series})
|
|
201
|
+
response.raise_for_status()
|
|
202
|
+
data = response.json()
|
|
203
|
+
return [ApiTarget(**item) for item in data]
|
|
204
|
+
|
|
205
|
+
def get_concepts(self) -> list[ConceptsMasterData]:
|
|
206
|
+
"""
|
|
207
|
+
Fetches all concepts.
|
|
208
|
+
"""
|
|
209
|
+
response = self._get("sdg/SDMXMetadata/GetConceptsMasterList")
|
|
210
|
+
response.raise_for_status()
|
|
211
|
+
return [ConceptsMasterData(**item) for item in response.json()]
|
|
212
|
+
|
|
213
|
+
def get_sdmx_series(self) -> list[SDMXMetaDataResponse]:
|
|
214
|
+
"""
|
|
215
|
+
Fetches all SDMX series metadata.
|
|
216
|
+
"""
|
|
217
|
+
response = self._get("sdg/SDMXMetadata/GetSeries")
|
|
218
|
+
response.raise_for_status()
|
|
219
|
+
return [SDMXMetaDataResponse(**item) for item in response.json()]
|
|
220
|
+
|
|
221
|
+
def get_series_dimensions(self, series_code: str) -> list[ApiDimension]:
|
|
222
|
+
"""
|
|
223
|
+
Fetches available disaggregation dimensions for a series.
|
|
224
|
+
"""
|
|
225
|
+
response = self._get(f"/sdg/Series/{series_code}/Dimensions")
|
|
226
|
+
response.raise_for_status()
|
|
227
|
+
return [ApiDimension(**item) for item in response.json()]
|
|
228
|
+
|
|
229
|
+
def get_series_data(
|
|
230
|
+
self,
|
|
231
|
+
series_codes: Sequence[str],
|
|
232
|
+
area_code: str | None = None,
|
|
233
|
+
start_period: str | None = None,
|
|
234
|
+
end_period: str | None = None,
|
|
235
|
+
release_code: str | None = None,
|
|
236
|
+
dimensions: DimensionArgument = "coarsest",
|
|
237
|
+
) -> list[dict]:
|
|
238
|
+
"""
|
|
239
|
+
Pulls actual data observations for given series codes across all pages.
|
|
240
|
+
Returns a list of dictionaries, making it easy to create a Polars or Pandas DataFrame.
|
|
241
|
+
"""
|
|
242
|
+
series_codes = _series_code_list(series_codes)
|
|
243
|
+
params = {"pageSize": 1000}
|
|
244
|
+
if area_code:
|
|
245
|
+
params["areaCode"] = area_code
|
|
246
|
+
if release_code:
|
|
247
|
+
params["releaseCode"] = release_code
|
|
248
|
+
time_period_params = _period_query_params(start_period, end_period)
|
|
249
|
+
if time_period_params == {}:
|
|
250
|
+
return []
|
|
251
|
+
if time_period_params is not None:
|
|
252
|
+
params.update(time_period_params)
|
|
253
|
+
|
|
254
|
+
if dimensions == "coarsest":
|
|
255
|
+
all_observations = []
|
|
256
|
+
for series_code in series_codes:
|
|
257
|
+
series_params = {**params, "seriesCode": series_code}
|
|
258
|
+
coarsest_dimensions = _coarsest_dimension_filters(
|
|
259
|
+
self.get_series_dimensions(series_code)
|
|
260
|
+
)
|
|
261
|
+
if coarsest_dimensions:
|
|
262
|
+
series_params["dimensions"] = _dimension_payload(coarsest_dimensions)
|
|
263
|
+
all_observations.extend(self._fetch_series_data(series_params))
|
|
264
|
+
return all_observations
|
|
265
|
+
|
|
266
|
+
params["seriesCode"] = ",".join(series_codes)
|
|
267
|
+
if dimensions != "all":
|
|
268
|
+
if not isinstance(dimensions, Mapping):
|
|
269
|
+
raise ValueError('dimensions must be "coarsest", "all", or a mapping')
|
|
270
|
+
params["dimensions"] = _dimension_payload(dimensions)
|
|
271
|
+
|
|
272
|
+
return self._fetch_series_data(params)
|
|
273
|
+
|
|
274
|
+
def _fetch_series_data(self, params: dict) -> list[dict]:
|
|
275
|
+
time_periods = params.get("timePeriod")
|
|
276
|
+
if isinstance(time_periods, list) and len(time_periods) > 1 and "dimensions" in params:
|
|
277
|
+
all_observations = []
|
|
278
|
+
for time_period in time_periods:
|
|
279
|
+
period_params = {**params, "timePeriod": [time_period]}
|
|
280
|
+
all_observations.extend(self._fetch_series_data(period_params))
|
|
281
|
+
return all_observations
|
|
282
|
+
|
|
283
|
+
params = {**params, "page": 1}
|
|
284
|
+
all_observations = []
|
|
285
|
+
|
|
286
|
+
while True:
|
|
287
|
+
# /sdg/Series/Data is often more direct than generic Observation for this
|
|
288
|
+
response = self._get("/sdg/Series/Data", params=params)
|
|
289
|
+
response.raise_for_status()
|
|
290
|
+
|
|
291
|
+
page_data = response.json()
|
|
292
|
+
observations = page_data.get("data", [])
|
|
293
|
+
all_observations.extend(_normalize_observation(item) for item in observations)
|
|
294
|
+
|
|
295
|
+
total_pages = page_data.get("totalPages")
|
|
296
|
+
if total_pages is not None and params["page"] >= int(total_pages):
|
|
297
|
+
break
|
|
298
|
+
|
|
299
|
+
# UNSD may omit totalPages, so fall back to response size.
|
|
300
|
+
if not observations or len(observations) < params.get("pageSize", 100):
|
|
301
|
+
break
|
|
302
|
+
|
|
303
|
+
params["page"] += 1
|
|
304
|
+
|
|
305
|
+
return all_observations
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
|
|
3
|
+
import httpx
|
|
4
|
+
|
|
5
|
+
_enabled = False
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def enable() -> None:
|
|
9
|
+
"""
|
|
10
|
+
Enables debug query output for all sdgdata clients in this process.
|
|
11
|
+
"""
|
|
12
|
+
global _enabled
|
|
13
|
+
_enabled = True
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def disable() -> None:
|
|
17
|
+
"""
|
|
18
|
+
Disables debug query output for all sdgdata clients in this process.
|
|
19
|
+
"""
|
|
20
|
+
global _enabled
|
|
21
|
+
_enabled = False
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def is_enabled() -> bool:
|
|
25
|
+
"""
|
|
26
|
+
Returns whether debug query output is enabled.
|
|
27
|
+
"""
|
|
28
|
+
return _enabled
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def print_query(request: httpx.Request) -> None:
|
|
32
|
+
"""
|
|
33
|
+
Prints the fully constructed request URL when debug mode is enabled.
|
|
34
|
+
"""
|
|
35
|
+
if _enabled:
|
|
36
|
+
print(f"sdgdata query: {request.url}", file=sys.stderr)
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from pydantic import BaseModel, Field
|
|
2
|
+
|
|
3
|
+
from sdgdata.models import ApiDimension
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class SeriesMetadata(BaseModel):
|
|
7
|
+
code: str
|
|
8
|
+
description: str | None = None
|
|
9
|
+
uri: str | None = None
|
|
10
|
+
latest_release: str | None = None
|
|
11
|
+
releases: list[str] = Field(default_factory=list)
|
|
12
|
+
dimensions: list[ApiDimension] = Field(default_factory=list)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class IndicatorSeriesMetadata(BaseModel):
|
|
16
|
+
code: str
|
|
17
|
+
description: str | None = None
|
|
18
|
+
tier: str | None = None
|
|
19
|
+
uri: str | None = None
|
|
20
|
+
series: list[SeriesMetadata] = Field(default_factory=list)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class TargetSeriesMetadata(BaseModel):
|
|
24
|
+
code: str
|
|
25
|
+
title: str | None = None
|
|
26
|
+
description: str | None = None
|
|
27
|
+
uri: str | None = None
|
|
28
|
+
indicators: list[IndicatorSeriesMetadata] = Field(default_factory=list)
|