parsimony-sdmx 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. parsimony_sdmx/__init__.py +43 -0
  2. parsimony_sdmx/_catalog_planning.py +95 -0
  3. parsimony_sdmx/_legacy_sdmx.py +794 -0
  4. parsimony_sdmx/cli/__init__.py +1 -0
  5. parsimony_sdmx/cli/args.py +126 -0
  6. parsimony_sdmx/cli/layout.py +41 -0
  7. parsimony_sdmx/cli/listing.py +172 -0
  8. parsimony_sdmx/cli/main.py +183 -0
  9. parsimony_sdmx/cli/memory_monitor.py +225 -0
  10. parsimony_sdmx/cli/orchestrator.py +345 -0
  11. parsimony_sdmx/cli/orphan_sweep.py +32 -0
  12. parsimony_sdmx/cli/summary.py +38 -0
  13. parsimony_sdmx/cli/worker.py +80 -0
  14. parsimony_sdmx/connectors/__init__.py +41 -0
  15. parsimony_sdmx/connectors/_agencies.py +44 -0
  16. parsimony_sdmx/connectors/enumerate_datasets.py +110 -0
  17. parsimony_sdmx/connectors/enumerate_series.py +128 -0
  18. parsimony_sdmx/connectors/fetch.py +166 -0
  19. parsimony_sdmx/core/__init__.py +5 -0
  20. parsimony_sdmx/core/codelists.py +50 -0
  21. parsimony_sdmx/core/errors.py +27 -0
  22. parsimony_sdmx/core/models.py +17 -0
  23. parsimony_sdmx/core/outcomes.py +47 -0
  24. parsimony_sdmx/core/projection.py +87 -0
  25. parsimony_sdmx/core/titles.py +55 -0
  26. parsimony_sdmx/io/__init__.py +1 -0
  27. parsimony_sdmx/io/html.py +38 -0
  28. parsimony_sdmx/io/http.py +198 -0
  29. parsimony_sdmx/io/parquet.py +201 -0
  30. parsimony_sdmx/io/paths.py +47 -0
  31. parsimony_sdmx/io/xml.py +116 -0
  32. parsimony_sdmx/providers/__init__.py +1 -0
  33. parsimony_sdmx/providers/agencies.py +15 -0
  34. parsimony_sdmx/providers/ecb.py +139 -0
  35. parsimony_sdmx/providers/ecb_portal.py +224 -0
  36. parsimony_sdmx/providers/ecb_series_attrs.py +81 -0
  37. parsimony_sdmx/providers/estat.py +23 -0
  38. parsimony_sdmx/providers/imf.py +23 -0
  39. parsimony_sdmx/providers/protocol.py +26 -0
  40. parsimony_sdmx/providers/registry.py +33 -0
  41. parsimony_sdmx/providers/sdmx_client.py +38 -0
  42. parsimony_sdmx/providers/sdmx_extract.py +153 -0
  43. parsimony_sdmx/providers/sdmx_flow.py +152 -0
  44. parsimony_sdmx/providers/wb.py +355 -0
  45. parsimony_sdmx-0.2.0.dist-info/METADATA +145 -0
  46. parsimony_sdmx-0.2.0.dist-info/RECORD +49 -0
  47. parsimony_sdmx-0.2.0.dist-info/WHEEL +4 -0
  48. parsimony_sdmx-0.2.0.dist-info/entry_points.txt +5 -0
  49. parsimony_sdmx-0.2.0.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,43 @@
1
+ """``parsimony-sdmx`` — SDMX connector plugin for the ``parsimony`` kernel.
2
+
3
+ Exports:
4
+
5
+ - :data:`CONNECTORS` — the plugin surface discovered via the
6
+ ``parsimony.providers`` entry point group. Three items: two enumerators
7
+ (dataset-level + per-dataset series) and one live fetch connector. Both
8
+ enumerators carry a ``catalog=`` declaration so ``parsimony bundles``
9
+ drives publish; SDMX no longer ships its own bundle CLI.
10
+ - :data:`ENV_VARS` — empty. SDMX endpoints are public.
11
+ - :data:`PROVIDER_METADATA` — bundle topology and supported agencies.
12
+
13
+ Discovery is driven by the kernel via entry points declared in
14
+ ``pyproject.toml``. No manual registration required — users ``pip install
15
+ parsimony-sdmx`` and the plugin appears in ``parsimony list-plugins``.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ from typing import Any
21
+
22
+ from parsimony_sdmx.connectors import CONNECTORS, ENV_VARS
23
+ from parsimony_sdmx.connectors._agencies import ALL_AGENCIES
24
+
25
+ __version__ = "0.2.0"
26
+
27
+
28
+ PROVIDER_METADATA: dict[str, Any] = {
29
+ "agencies": [a.value for a in ALL_AGENCIES],
30
+ "namespace_templates": [
31
+ "sdmx_datasets",
32
+ "sdmx_series_{agency}_{dataset_id}",
33
+ ],
34
+ "plugin_version": __version__,
35
+ }
36
+
37
+
38
+ __all__ = [
39
+ "CONNECTORS",
40
+ "ENV_VARS",
41
+ "PROVIDER_METADATA",
42
+ "__version__",
43
+ ]
@@ -0,0 +1,95 @@
1
+ """Plan generators for ``parsimony.bundles`` discovery.
2
+
3
+ The new bundle pipeline (``parsimony.bundles``) drives every plugin's
4
+ publish flow through ``CatalogDynamicSpec.plan`` — an async generator
5
+ that yields one :class:`~parsimony.bundles.CatalogPlan` per bundle the
6
+ plugin wants built. SDMX has thousands of per-dataset series bundles, so
7
+ the plan generator walks the on-disk flat-catalog parquet files and
8
+ emits one plan item per ``(agency, dataset_id)`` pair.
9
+
10
+ The on-disk root is :data:`DEFAULT_OUTPUTS_ROOT` (sibling to the package),
11
+ overridable via the ``PARSIMONY_SDMX_OUTPUTS_ROOT`` env var. Missing
12
+ agencies are silently skipped — callers running ``parsimony bundles
13
+ plan`` against a workspace where only one agency has been built locally
14
+ should see only that agency's bundles.
15
+
16
+ This module is import-cheap: it imports ``pyarrow`` lazily inside
17
+ :func:`plan_sdmx_series` so importing the plugin's surface (which the
18
+ ``parsimony list-plugins`` discovery does eagerly) doesn't pay arrow's
19
+ cost when no one is publishing yet.
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ import os
25
+ from collections.abc import AsyncIterator
26
+ from pathlib import Path
27
+
28
+ from parsimony.bundles import CatalogPlan
29
+
30
+ from parsimony_sdmx.connectors._agencies import (
31
+ ALL_AGENCIES,
32
+ AgencyId,
33
+ to_namespace_token,
34
+ )
35
+
36
+
37
+ def _outputs_root() -> Path:
38
+ """Resolve the flat-catalog outputs root from env var or default.
39
+
40
+ The default is imported lazily — :mod:`parsimony_sdmx.connectors.enumerate_datasets`
41
+ pulls in pyarrow+pandas at module load, which we don't want when the
42
+ plan generator is only being inspected (e.g. by the discovery walk).
43
+ """
44
+ env = os.environ.get("PARSIMONY_SDMX_OUTPUTS_ROOT")
45
+ if env:
46
+ return Path(env)
47
+ from parsimony_sdmx.connectors.enumerate_datasets import DEFAULT_OUTPUTS_ROOT
48
+
49
+ return DEFAULT_OUTPUTS_ROOT
50
+
51
+
52
+ def _series_namespace(agency: AgencyId, dataset_id: str) -> str:
53
+ """Compose the per-dataset series namespace from agency + dataset id.
54
+
55
+ The template literal is inlined here (rather than imported from
56
+ :mod:`parsimony_sdmx.connectors.enumerate_series`) to avoid a
57
+ circular import — ``enumerate_series`` declares ``catalog=`` with
58
+ a callable that lives in this module.
59
+ """
60
+ return f"sdmx_series_{to_namespace_token(agency)}_{dataset_id.lower()}"
61
+
62
+
63
+ async def plan_sdmx_series() -> AsyncIterator[CatalogPlan]:
64
+ """Yield one plan per ``(agency, dataset_id)`` pair found on disk.
65
+
66
+ Reads each agency's ``outputs/{AGENCY}/datasets.parquet`` and emits a
67
+ :class:`CatalogPlan` for every row. Empty / absent agency files are
68
+ skipped silently — local workspaces don't always have every agency.
69
+
70
+ Plan params shape::
71
+
72
+ {"agency": "ECB", "dataset_id": "YC"}
73
+
74
+ These map 1:1 to :class:`~parsimony_sdmx.connectors.enumerate_series.EnumerateSeriesParams`
75
+ so the ``parsimony bundles`` runner adapter constructs the model
76
+ directly via ``EnumerateSeriesParams(**plan.params)``.
77
+ """
78
+ import pyarrow.parquet as pq
79
+
80
+ root = _outputs_root()
81
+ for agency in ALL_AGENCIES:
82
+ path = root / agency.value / "datasets.parquet"
83
+ if not path.exists():
84
+ continue
85
+ table = pq.read_table(path, columns=["dataset_id"])
86
+ for dataset_id in table.column("dataset_id").to_pylist():
87
+ yield CatalogPlan(
88
+ namespace=_series_namespace(agency, dataset_id),
89
+ params={"agency": agency.value, "dataset_id": dataset_id},
90
+ )
91
+
92
+
93
+ __all__ = [
94
+ "plan_sdmx_series",
95
+ ]