parsimony-sdmx 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parsimony_sdmx/__init__.py +43 -0
- parsimony_sdmx/_catalog_planning.py +95 -0
- parsimony_sdmx/_legacy_sdmx.py +794 -0
- parsimony_sdmx/cli/__init__.py +1 -0
- parsimony_sdmx/cli/args.py +126 -0
- parsimony_sdmx/cli/layout.py +41 -0
- parsimony_sdmx/cli/listing.py +172 -0
- parsimony_sdmx/cli/main.py +183 -0
- parsimony_sdmx/cli/memory_monitor.py +225 -0
- parsimony_sdmx/cli/orchestrator.py +345 -0
- parsimony_sdmx/cli/orphan_sweep.py +32 -0
- parsimony_sdmx/cli/summary.py +38 -0
- parsimony_sdmx/cli/worker.py +80 -0
- parsimony_sdmx/connectors/__init__.py +41 -0
- parsimony_sdmx/connectors/_agencies.py +44 -0
- parsimony_sdmx/connectors/enumerate_datasets.py +110 -0
- parsimony_sdmx/connectors/enumerate_series.py +128 -0
- parsimony_sdmx/connectors/fetch.py +166 -0
- parsimony_sdmx/core/__init__.py +5 -0
- parsimony_sdmx/core/codelists.py +50 -0
- parsimony_sdmx/core/errors.py +27 -0
- parsimony_sdmx/core/models.py +17 -0
- parsimony_sdmx/core/outcomes.py +47 -0
- parsimony_sdmx/core/projection.py +87 -0
- parsimony_sdmx/core/titles.py +55 -0
- parsimony_sdmx/io/__init__.py +1 -0
- parsimony_sdmx/io/html.py +38 -0
- parsimony_sdmx/io/http.py +198 -0
- parsimony_sdmx/io/parquet.py +201 -0
- parsimony_sdmx/io/paths.py +47 -0
- parsimony_sdmx/io/xml.py +116 -0
- parsimony_sdmx/providers/__init__.py +1 -0
- parsimony_sdmx/providers/agencies.py +15 -0
- parsimony_sdmx/providers/ecb.py +139 -0
- parsimony_sdmx/providers/ecb_portal.py +224 -0
- parsimony_sdmx/providers/ecb_series_attrs.py +81 -0
- parsimony_sdmx/providers/estat.py +23 -0
- parsimony_sdmx/providers/imf.py +23 -0
- parsimony_sdmx/providers/protocol.py +26 -0
- parsimony_sdmx/providers/registry.py +33 -0
- parsimony_sdmx/providers/sdmx_client.py +38 -0
- parsimony_sdmx/providers/sdmx_extract.py +153 -0
- parsimony_sdmx/providers/sdmx_flow.py +152 -0
- parsimony_sdmx/providers/wb.py +355 -0
- parsimony_sdmx-0.2.0.dist-info/METADATA +145 -0
- parsimony_sdmx-0.2.0.dist-info/RECORD +49 -0
- parsimony_sdmx-0.2.0.dist-info/WHEEL +4 -0
- parsimony_sdmx-0.2.0.dist-info/entry_points.txt +5 -0
- parsimony_sdmx-0.2.0.dist-info/licenses/LICENSE +190 -0
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""``parsimony-sdmx`` — SDMX connector plugin for the ``parsimony`` kernel.
|
|
2
|
+
|
|
3
|
+
Exports:
|
|
4
|
+
|
|
5
|
+
- :data:`CONNECTORS` — the plugin surface discovered via the
|
|
6
|
+
``parsimony.providers`` entry point group. Three items: two enumerators
|
|
7
|
+
(dataset-level + per-dataset series) and one live fetch connector. Both
|
|
8
|
+
enumerators carry a ``catalog=`` declaration so ``parsimony bundles``
|
|
9
|
+
drives publish; SDMX no longer ships its own bundle CLI.
|
|
10
|
+
- :data:`ENV_VARS` — empty. SDMX endpoints are public.
|
|
11
|
+
- :data:`PROVIDER_METADATA` — bundle topology and supported agencies.
|
|
12
|
+
|
|
13
|
+
Discovery is driven by the kernel via entry points declared in
|
|
14
|
+
``pyproject.toml``. No manual registration required — users ``pip install
|
|
15
|
+
parsimony-sdmx`` and the plugin appears in ``parsimony list-plugins``.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
from typing import Any
|
|
21
|
+
|
|
22
|
+
from parsimony_sdmx.connectors import CONNECTORS, ENV_VARS
|
|
23
|
+
from parsimony_sdmx.connectors._agencies import ALL_AGENCIES
|
|
24
|
+
|
|
25
|
+
__version__ = "0.2.0"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
PROVIDER_METADATA: dict[str, Any] = {
|
|
29
|
+
"agencies": [a.value for a in ALL_AGENCIES],
|
|
30
|
+
"namespace_templates": [
|
|
31
|
+
"sdmx_datasets",
|
|
32
|
+
"sdmx_series_{agency}_{dataset_id}",
|
|
33
|
+
],
|
|
34
|
+
"plugin_version": __version__,
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
__all__ = [
|
|
39
|
+
"CONNECTORS",
|
|
40
|
+
"ENV_VARS",
|
|
41
|
+
"PROVIDER_METADATA",
|
|
42
|
+
"__version__",
|
|
43
|
+
]
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"""Plan generators for ``parsimony.bundles`` discovery.
|
|
2
|
+
|
|
3
|
+
The new bundle pipeline (``parsimony.bundles``) drives every plugin's
|
|
4
|
+
publish flow through ``CatalogDynamicSpec.plan`` — an async generator
|
|
5
|
+
that yields one :class:`~parsimony.bundles.CatalogPlan` per bundle the
|
|
6
|
+
plugin wants built. SDMX has thousands of per-dataset series bundles, so
|
|
7
|
+
the plan generator walks the on-disk flat-catalog parquet files and
|
|
8
|
+
emits one plan item per ``(agency, dataset_id)`` pair.
|
|
9
|
+
|
|
10
|
+
The on-disk root is :data:`DEFAULT_OUTPUTS_ROOT` (sibling to the package),
|
|
11
|
+
overridable via the ``PARSIMONY_SDMX_OUTPUTS_ROOT`` env var. Missing
|
|
12
|
+
agencies are silently skipped — callers running ``parsimony bundles
|
|
13
|
+
plan`` against a workspace where only one agency has been built locally
|
|
14
|
+
should see only that agency's bundles.
|
|
15
|
+
|
|
16
|
+
This module is import-cheap: it imports ``pyarrow`` lazily inside
|
|
17
|
+
:func:`plan_sdmx_series` so importing the plugin's surface (which the
|
|
18
|
+
``parsimony list-plugins`` discovery does eagerly) doesn't pay arrow's
|
|
19
|
+
cost when no one is publishing yet.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import os
|
|
25
|
+
from collections.abc import AsyncIterator
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
|
|
28
|
+
from parsimony.bundles import CatalogPlan
|
|
29
|
+
|
|
30
|
+
from parsimony_sdmx.connectors._agencies import (
|
|
31
|
+
ALL_AGENCIES,
|
|
32
|
+
AgencyId,
|
|
33
|
+
to_namespace_token,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _outputs_root() -> Path:
|
|
38
|
+
"""Resolve the flat-catalog outputs root from env var or default.
|
|
39
|
+
|
|
40
|
+
The default is imported lazily — :mod:`parsimony_sdmx.connectors.enumerate_datasets`
|
|
41
|
+
pulls in pyarrow+pandas at module load, which we don't want when the
|
|
42
|
+
plan generator is only being inspected (e.g. by the discovery walk).
|
|
43
|
+
"""
|
|
44
|
+
env = os.environ.get("PARSIMONY_SDMX_OUTPUTS_ROOT")
|
|
45
|
+
if env:
|
|
46
|
+
return Path(env)
|
|
47
|
+
from parsimony_sdmx.connectors.enumerate_datasets import DEFAULT_OUTPUTS_ROOT
|
|
48
|
+
|
|
49
|
+
return DEFAULT_OUTPUTS_ROOT
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _series_namespace(agency: AgencyId, dataset_id: str) -> str:
|
|
53
|
+
"""Compose the per-dataset series namespace from agency + dataset id.
|
|
54
|
+
|
|
55
|
+
The template literal is inlined here (rather than imported from
|
|
56
|
+
:mod:`parsimony_sdmx.connectors.enumerate_series`) to avoid a
|
|
57
|
+
circular import — ``enumerate_series`` declares ``catalog=`` with
|
|
58
|
+
a callable that lives in this module.
|
|
59
|
+
"""
|
|
60
|
+
return f"sdmx_series_{to_namespace_token(agency)}_{dataset_id.lower()}"
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
async def plan_sdmx_series() -> AsyncIterator[CatalogPlan]:
|
|
64
|
+
"""Yield one plan per ``(agency, dataset_id)`` pair found on disk.
|
|
65
|
+
|
|
66
|
+
Reads each agency's ``outputs/{AGENCY}/datasets.parquet`` and emits a
|
|
67
|
+
:class:`CatalogPlan` for every row. Empty / absent agency files are
|
|
68
|
+
skipped silently — local workspaces don't always have every agency.
|
|
69
|
+
|
|
70
|
+
Plan params shape::
|
|
71
|
+
|
|
72
|
+
{"agency": "ECB", "dataset_id": "YC"}
|
|
73
|
+
|
|
74
|
+
These map 1:1 to :class:`~parsimony_sdmx.connectors.enumerate_series.EnumerateSeriesParams`
|
|
75
|
+
so the ``parsimony bundles`` runner adapter constructs the model
|
|
76
|
+
directly via ``EnumerateSeriesParams(**plan.params)``.
|
|
77
|
+
"""
|
|
78
|
+
import pyarrow.parquet as pq
|
|
79
|
+
|
|
80
|
+
root = _outputs_root()
|
|
81
|
+
for agency in ALL_AGENCIES:
|
|
82
|
+
path = root / agency.value / "datasets.parquet"
|
|
83
|
+
if not path.exists():
|
|
84
|
+
continue
|
|
85
|
+
table = pq.read_table(path, columns=["dataset_id"])
|
|
86
|
+
for dataset_id in table.column("dataset_id").to_pylist():
|
|
87
|
+
yield CatalogPlan(
|
|
88
|
+
namespace=_series_namespace(agency, dataset_id),
|
|
89
|
+
params={"agency": agency.value, "dataset_id": dataset_id},
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
__all__ = [
|
|
94
|
+
"plan_sdmx_series",
|
|
95
|
+
]
|