parsimony-bdf 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,40 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *$py.class
4
+ *.so
5
+
6
+ .Python
7
+ build/
8
+ dist/
9
+ *.egg-info/
10
+ *.egg
11
+
12
+ .venv/
13
+ .env
14
+ .env.*
15
+ !.env.example
16
+
17
+ .pytest_cache/
18
+ .mypy_cache/
19
+ .ruff_cache/
20
+ .coverage
21
+ htmlcov/
22
+ coverage.xml
23
+
24
+ uv.lock
25
+
26
+ .vscode/
27
+ .council/
28
+ PLAN-*.md
29
+ .idea/
30
+ *.swp
31
+ .DS_Store
32
+
33
+ outputs/
34
+ logs/
35
+ # Recorded HTTP cassettes must never be committed — respx mocks are hand-authored
36
+ # from upstream API documentation. A pre-commit / CI regex scan is the belt; this
37
+ # ignore is the braces. Override per-file via `!` if you need a hand-authored
38
+ # fixture checked in.
39
+ packages/*/tests/fixtures/**
40
+ !packages/*/tests/fixtures/README.md
@@ -0,0 +1,57 @@
1
+ # Changelog — parsimony-bdf
2
+
3
+ All notable changes to `parsimony-bdf` will be documented in this file. The
4
+ format is based on [Keep a Changelog](https://keepachangelog.com/) and
5
+ this project adheres to [Semantic Versioning](https://semver.org/).
6
+
7
+ ## [0.8.0] — 2026-06-08
8
+
9
+ Ground-up refactor, run through the full connector guidebook process and
10
+ **live-verified** against the production Webstat API (the connector previously
11
+ shipped `UNVERIFIED-LIVE` because no key was on hand).
12
+
13
+ ### Changed
14
+
15
+ - **Enumeration switched to archetype A (live full-index export).** The Webstat
16
+ `series` dataset is a single flat queryable table, so `enumerate_bdf` now
17
+ streams the entire ~41.6k-series universe in **one** `series/exports/json`
18
+ call (plus one `webstat-datasets` call for the 45 dataflow stubs), replacing
19
+ the previous 45-call per-dataset crawl. Completeness is self-tracking and
20
+ verifiable by diffing `len(catalog)` against the live `series` total_count.
21
+ - **Bilingual, breadcrumb-rich catalog at no extra cost.** Series rows now carry
22
+ English + French titles and the `path_en`/`path_fr` topic breadcrumb folded
23
+ into the indexed `description`, improving cross-language and topical recall.
24
+ No separate enrichment pass is needed (the source already serves both
25
+ languages).
26
+ - **Package restructured** into `_http` / `outputs` / `connectors/{fetch,
27
+ enumerate,_catalog}` / `search` / `catalog_build`, mirroring the `bde`
28
+ exemplar; the top-level surface stays `CONNECTORS` + `load`.
29
+ - `bdf_fetch` validates `start_period` / `end_period` as ISO dates pre-network
30
+ (`InvalidParameterError`) and tolerates null `obs_value` (missing-status gaps).
31
+
32
+ ### Added
33
+
34
+ - Live integration suite (now runnable with `BDF_API_KEY`): keyed fetch, a
35
+ dataset-bounded live enumerate, and a fixture-catalog search.
36
+ - `catalog_tests/queries.yaml` recall gate (referenced by the catalog-validate
37
+ registry) and `tests/test_build_catalog.py` index-policy test.
38
+
39
+ ## [0.5.0] — 2026-05-06
40
+
41
+ ### Changed
42
+
43
+ - Adapted to `parsimony-core==0.5`. Connector code no longer constructs `Provenance` directly; the framework authors all provenance fields in `Connector._wrap_result`. Source-specific extras (where present) move to `Result.with_properties(**kwargs)`. Drops the `provenance=` and `params=` kwargs from `OutputConfig.build_table_result` / `Result.from_dataframe` call sites.
44
+ - Bump `parsimony-core` pin from `>=0.4.0,<0.5` to `>=0.5.0,<0.6` (and `[standard-onnx]` extra accordingly on catalog-publishing packages).
45
+ ## [0.4.0] — 2026-04-24
46
+
47
+ Part of the first coordinated release of the
48
+ [`parsimony-connectors`](https://github.com/ockham-sh/parsimony-connectors)
49
+ monorepo under `parsimony-core==0.4`.
50
+
51
+ ### Changed
52
+
53
+ - Connector rewritten against the kernel's `parsimony.discover` surface
54
+ (`iter_providers`, `load`, `load_all`) and the `@connector(env=...)`
55
+ decorator-level env-var declaration that replaced module-level
56
+ `ENV_VARS`.
57
+ - Pin bumped to `parsimony-core>=0.4,<0.5`.
@@ -0,0 +1,190 @@
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to the Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by the Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding any notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ Copyright 2026 Ockham.sh
179
+
180
+ Licensed under the Apache License, Version 2.0 (the "License");
181
+ you may not use this file except in compliance with the License.
182
+ You may obtain a copy of the License at
183
+
184
+ http://www.apache.org/licenses/LICENSE-2.0
185
+
186
+ Unless required by applicable law or agreed to in writing, software
187
+ distributed under the License is distributed on an "AS IS" BASIS,
188
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
189
+ See the License for the specific language governing permissions and
190
+ limitations under the License.
@@ -0,0 +1,124 @@
1
+ Metadata-Version: 2.4
2
+ Name: parsimony-bdf
3
+ Version: 0.0.1
4
+ Summary: Banque de France connector for the parsimony framework
5
+ Project-URL: Homepage, https://www.banque-france.fr
6
+ Project-URL: Repository, https://github.com/ockham-sh/parsimony-connectors
7
+ Project-URL: Issues, https://github.com/ockham-sh/parsimony-connectors/issues
8
+ Author-email: "Ockham.sh" <team@ockham.sh>
9
+ License-Expression: Apache-2.0
10
+ License-File: LICENSE
11
+ Keywords: bdf,connectors,data,finance,parsimony
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Intended Audience :: Financial and Insurance Industry
15
+ Classifier: License :: OSI Approved :: Apache Software License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Office/Business :: Financial
21
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
22
+ Classifier: Typing :: Typed
23
+ Requires-Python: >=3.11
24
+ Requires-Dist: pandas<3,>=2.3.0
25
+ Requires-Dist: parsimony-core[catalog]>=0.0.1
26
+ Requires-Dist: parsimony-shared>=0.0.1
27
+ Requires-Dist: pydantic<3,>=2.11.1
28
+ Provides-Extra: dev
29
+ Requires-Dist: mypy>=1.10; extra == 'dev'
30
+ Requires-Dist: pytest-cov>=5.0; extra == 'dev'
31
+ Requires-Dist: pytest>=9.0.3; extra == 'dev'
32
+ Requires-Dist: respx>=0.22.0; extra == 'dev'
33
+ Requires-Dist: ruff>=0.15.10; extra == 'dev'
34
+ Description-Content-Type: text/markdown
35
+
36
+ # parsimony-bdf
37
+
38
+ Banque de France connector — French macroeconomic, monetary, and financial time
39
+ series via the Webstat (Opendatasoft) API.
40
+
41
+ Part of the [parsimony-connectors](https://github.com/ockham-sh/parsimony-connectors) monorepo. Distributed standalone on PyPI as `parsimony-bdf`.
42
+
43
+ ## Connectors
44
+
45
+ | Name | Kind | Description |
46
+ |---|---|---|
47
+ | `bdf_fetch` | connector | Fetch a Banque de France time series by SDMX key (e.g. `EXR.M.USD.EUR.SP00.E`), optionally bounded by `start_period`/`end_period`. |
48
+ | `enumerate_bdf` | enumerator | Stream the full BdF series universe (~41.6k series across 45 dataflows) for catalog discovery. |
49
+ | `bdf_search` | connector | Semantic-search the published BdF catalog snapshot; returns ranked series codes. |
50
+
51
+ ## Install
52
+
53
+ ```bash
54
+ pip install parsimony-bdf
55
+ ```
56
+
57
+ Pulls in `parsimony-core>=0.7,<0.8` automatically. Verify discovery:
58
+
59
+ ```bash
60
+ python -c "from parsimony import discover; print([p.name for p in discover.iter_providers()])"
61
+ ```
62
+
63
+ ## Configuration
64
+
65
+ The Webstat API requires a free API key. Register at
66
+ https://developer.webstat.banque-france.fr/, then set:
67
+
68
+ ```bash
69
+ export BDF_API_KEY="<your-key>"
70
+ ```
71
+
72
+ The key is sent in the `Authorization: Apikey <KEY>` header (the literal word
73
+ `Apikey`, **not** `Bearer`). It is declared as a secret (stripped from
74
+ provenance) and never appears in request logs. Supply it via the env var above,
75
+ or bind it explicitly:
76
+
77
+ ```python
78
+ from parsimony_bdf import load
79
+ connectors = load(api_key="<your-key>") # binds the key across the bundle
80
+ ```
81
+
82
+ A missing key fails fast with `UnauthorizedError` naming `BDF_API_KEY`.
83
+
84
+ `bdf_search` reads a published catalog snapshot (default `hf://parsimony-dev/bdf`).
85
+ Override the snapshot location with the `PARSIMONY_BDF_CATALOG_URL` environment
86
+ variable, or pass `catalog_url=` at call time.
87
+
88
+ ## Quick start
89
+
90
+ ```python
91
+ from parsimony_bdf import load
92
+
93
+ connectors = load(api_key="<your-key>")
94
+ result = connectors["bdf_fetch"](key="EXR.M.USD.EUR.SP00.E")
95
+ print(result.data.head())
96
+ ```
97
+
98
+ For multi-plugin composition (autoloads everything installed):
99
+
100
+ ```python
101
+ from parsimony import discover
102
+ connectors = discover.load_all()
103
+ ```
104
+
105
+ ## Catalogs
106
+
107
+ The Webstat `series` dataset is a single flat queryable table, so `enumerate_bdf`
108
+ discovers the **entire** universe in two requests: one `series/exports/json`
109
+ export (~41.6k series) plus one `webstat-datasets` call for the 45 dataflow stub
110
+ rows. Each series row carries English + French titles and a topic breadcrumb,
111
+ folded into the catalog `description` for cross-language recall. Maintainers
112
+ build a `Catalog` snapshot from it (`scripts/build_catalog.py`) and push it to
113
+ the snapshot URL `bdf_search` reads — the build runs offline as a publish job,
114
+ never at query time. Quota: 10,000 requests/day.
115
+
116
+ ## Provider
117
+
118
+ - Homepage: https://www.banque-france.fr
119
+ - Webstat portal: https://webstat.banque-france.fr
120
+ - Developer portal: https://developer.webstat.banque-france.fr/
121
+
122
+ ## License
123
+
124
+ See [LICENSE](./LICENSE).
@@ -0,0 +1,89 @@
1
+ # parsimony-bdf
2
+
3
+ Banque de France connector — French macroeconomic, monetary, and financial time
4
+ series via the Webstat (Opendatasoft) API.
5
+
6
+ Part of the [parsimony-connectors](https://github.com/ockham-sh/parsimony-connectors) monorepo. Distributed standalone on PyPI as `parsimony-bdf`.
7
+
8
+ ## Connectors
9
+
10
+ | Name | Kind | Description |
11
+ |---|---|---|
12
+ | `bdf_fetch` | connector | Fetch a Banque de France time series by SDMX key (e.g. `EXR.M.USD.EUR.SP00.E`), optionally bounded by `start_period`/`end_period`. |
13
+ | `enumerate_bdf` | enumerator | Stream the full BdF series universe (~41.6k series across 45 dataflows) for catalog discovery. |
14
+ | `bdf_search` | connector | Semantic-search the published BdF catalog snapshot; returns ranked series codes. |
15
+
16
+ ## Install
17
+
18
+ ```bash
19
+ pip install parsimony-bdf
20
+ ```
21
+
22
+ Pulls in `parsimony-core>=0.7,<0.8` automatically. Verify discovery:
23
+
24
+ ```bash
25
+ python -c "from parsimony import discover; print([p.name for p in discover.iter_providers()])"
26
+ ```
27
+
28
+ ## Configuration
29
+
30
+ The Webstat API requires a free API key. Register at
31
+ https://developer.webstat.banque-france.fr/, then set:
32
+
33
+ ```bash
34
+ export BDF_API_KEY="<your-key>"
35
+ ```
36
+
37
+ The key is sent in the `Authorization: Apikey <KEY>` header (the literal word
38
+ `Apikey`, **not** `Bearer`). It is declared as a secret (stripped from
39
+ provenance) and never appears in request logs. Supply it via the env var above,
40
+ or bind it explicitly:
41
+
42
+ ```python
43
+ from parsimony_bdf import load
44
+ connectors = load(api_key="<your-key>") # binds the key across the bundle
45
+ ```
46
+
47
+ A missing key fails fast with `UnauthorizedError` naming `BDF_API_KEY`.
48
+
49
+ `bdf_search` reads a published catalog snapshot (default `hf://parsimony-dev/bdf`).
50
+ Override the snapshot location with the `PARSIMONY_BDF_CATALOG_URL` environment
51
+ variable, or pass `catalog_url=` at call time.
52
+
53
+ ## Quick start
54
+
55
+ ```python
56
+ from parsimony_bdf import load
57
+
58
+ connectors = load(api_key="<your-key>")
59
+ result = connectors["bdf_fetch"](key="EXR.M.USD.EUR.SP00.E")
60
+ print(result.data.head())
61
+ ```
62
+
63
+ For multi-plugin composition (autoloads everything installed):
64
+
65
+ ```python
66
+ from parsimony import discover
67
+ connectors = discover.load_all()
68
+ ```
69
+
70
+ ## Catalogs
71
+
72
+ The Webstat `series` dataset is a single flat queryable table, so `enumerate_bdf`
73
+ discovers the **entire** universe in two requests: one `series/exports/json`
74
+ export (~41.6k series) plus one `webstat-datasets` call for the 45 dataflow stub
75
+ rows. Each series row carries English + French titles and a topic breadcrumb,
76
+ folded into the catalog `description` for cross-language recall. Maintainers
77
+ build a `Catalog` snapshot from it (`scripts/build_catalog.py`) and push it to
78
+ the snapshot URL `bdf_search` reads — the build runs offline as a publish job,
79
+ never at query time. Quota: 10,000 requests/day.
80
+
81
+ ## Provider
82
+
83
+ - Homepage: https://www.banque-france.fr
84
+ - Webstat portal: https://webstat.banque-france.fr
85
+ - Developer portal: https://developer.webstat.banque-france.fr/
86
+
87
+ ## License
88
+
89
+ See [LICENSE](./LICENSE).
@@ -0,0 +1,7 @@
1
+ """Banque de France (BdF): fetch + catalog enumeration."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from parsimony_bdf.connectors import CONNECTORS, load
6
+
7
+ __all__ = ["CONNECTORS", "load"]
@@ -0,0 +1,112 @@
1
+ """Banque de France (BdF) Webstat transport: constants, auth, HTTP client.
2
+
3
+ BdF publishes through an Opendatasoft Explore v2.1 API. A single Opendatasoft
4
+ "catalog" hosts several system datasets; three of them carry everything this
5
+ connector needs (all live-verified 2026-06-08):
6
+
7
+ * ``webstat-datasets`` — 45 records, one per BdF dataflow (``EXR``, ``BSI``, …),
8
+ each with bilingual ``name``/``description`` and a ``series_count``.
9
+ * ``series`` — the full series catalogue as one flat table (~41.6k records),
10
+ each row carrying ``series_key``, ``dataset_id``, bilingual titles, the
11
+ ``path_en``/``path_fr`` breadcrumb, frequency, reference area and source
12
+ agency. Because it is a single queryable table, the *entire* universe streams
13
+ from one ``/exports/json`` call — no per-dataset fan-out needed.
14
+ * ``observations`` — observation rows, filtered by ``series_key`` for a fetch.
15
+
16
+ Auth is an Opendatasoft API key sent in the ``Authorization: Apikey <KEY>``
17
+ header (the literal word ``Apikey`` — *not* ``Bearer``; the wrong scheme returns
18
+ a silent 401). The key rides the header, never a query param, so it never lands
19
+ in a request log. Register at https://developer.webstat.banque-france.fr/ and
20
+ export it as ``BDF_API_KEY``. Quota: 10,000 requests/day.
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import os
26
+
27
+ import httpx
28
+ from parsimony.errors import UnauthorizedError
29
+ from parsimony.transport import HttpClient
30
+ from parsimony.transport.helpers import make_http_client
31
+ from parsimony_shared.cb_enumerate import MetadataCrawlConfig
32
+
33
+ PROVIDER = "bdf"
34
+ ENV_VAR = "BDF_API_KEY"
35
+ USER_AGENT = "parsimony-bdf/0.8"
36
+
37
+ BASE_URL = "https://webstat.banque-france.fr/api/explore/v2.1/catalog/datasets"
38
+
39
+ # Opendatasoft export endpoints (relative to BASE_URL). ``/exports/json`` streams
40
+ # the whole (optionally filtered) dataset in one response — no pagination.
41
+ DATASETS_PATH = "webstat-datasets/exports/json"
42
+ SERIES_PATH = "series/exports/json"
43
+ OBSERVATIONS_PATH = "observations/exports/json"
44
+
45
+ # Lean column projections keep the streamed payloads small. The ``series`` table
46
+ # is ~200 columns wide (a sparse SDMX flat table) — selecting only what the
47
+ # catalog needs turns a multi-hundred-MB export into a few MB.
48
+ DATASETS_SELECT = "dataset_id,name_en,name_fr,description_en,description_fr,series_count"
49
+ SERIES_SELECT = (
50
+ "series_key,dataset_id,title_en,title_fr,title_long_en,title_long_fr,"
51
+ "freq,ref_area,source_agency,first_time_period_date,last_time_period_date,"
52
+ "path_en,path_fr"
53
+ )
54
+ OBSERVATIONS_SELECT = "series_key,title_en,title_fr,time_period_start,obs_value"
55
+
56
+ # The enumerator issues only two requests (datasets + the full series export),
57
+ # so concurrency is almost moot; a small cap with a courtesy delay is plenty.
58
+ METADATA_CRAWL = MetadataCrawlConfig(inter_request_delay_s=0.25)
59
+
60
+ # The full ``series`` export is large; allow a long read. Used by the enumerator.
61
+ CRAWL_TIMEOUT = httpx.Timeout(connect=30.0, read=180.0, write=30.0, pool=30.0)
62
+
63
+
64
+ def resolve_key(api_key: str) -> str:
65
+ """Resolve the API key (arg → ``BDF_API_KEY`` env fallback); fast-fail if absent.
66
+
67
+ A missing key raises :class:`UnauthorizedError` naming the env var so an agent
68
+ is told exactly which variable to set — *before* any network call is made.
69
+ ``env_var`` is keyword-only.
70
+ """
71
+ key = (api_key or os.environ.get(ENV_VAR, "")).strip()
72
+ if not key:
73
+ raise UnauthorizedError(PROVIDER, env_var=ENV_VAR)
74
+ return key
75
+
76
+
77
+ def auth_headers(key: str) -> dict[str, str]:
78
+ """Build the Webstat auth + transport headers for an already-resolved key.
79
+
80
+ Note the literal ``Apikey`` token (not ``Bearer``) — Opendatasoft's auth
81
+ scheme is non-standard, and the wrong word yields a silent 401.
82
+ """
83
+ return {
84
+ "Authorization": f"Apikey {key}",
85
+ "Accept": "application/json",
86
+ "User-Agent": USER_AGENT,
87
+ }
88
+
89
+
90
+ def make_fetch_client(api_key: str) -> HttpClient:
91
+ """Resolve the key (fast-fail) and build the canonical client for ``bdf_fetch``."""
92
+ key = resolve_key(api_key)
93
+ return make_http_client(BASE_URL, headers=auth_headers(key), timeout=60.0)
94
+
95
+
96
+ __all__ = [
97
+ "BASE_URL",
98
+ "CRAWL_TIMEOUT",
99
+ "DATASETS_PATH",
100
+ "DATASETS_SELECT",
101
+ "ENV_VAR",
102
+ "METADATA_CRAWL",
103
+ "OBSERVATIONS_PATH",
104
+ "OBSERVATIONS_SELECT",
105
+ "PROVIDER",
106
+ "SERIES_PATH",
107
+ "SERIES_SELECT",
108
+ "USER_AGENT",
109
+ "auth_headers",
110
+ "make_fetch_client",
111
+ "resolve_key",
112
+ ]
@@ -0,0 +1,37 @@
1
+ """Build the Banque de France catalog snapshot.
2
+
3
+ Maintainer tooling, not part of the plugin contract: ``enumerate_bdf`` streams
4
+ the full ``series`` universe, the rows become catalog entities, and the catalog
5
+ is indexed and built. Titles are already bilingual at the source (English short
6
+ title with a French / breadcrumb fallback in ``description``), so — unlike BdE —
7
+ no separate enrichment pass is needed.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from parsimony.catalog import Catalog
13
+ from parsimony.catalog.policy import discovery_indexes
14
+ from parsimony.catalog.source import entities_from_raw
15
+
16
+ from parsimony_bdf.connectors.enumerate import enumerate_bdf
17
+ from parsimony_bdf.outputs import BDF_ENUMERATE_OUTPUT
18
+
19
+ CATALOG_NAMESPACE = "bdf"
20
+
21
+
22
+ def build_bdf_catalog(*, api_key: str | None = None) -> Catalog:
23
+ """Enumerate the full BdF universe and build a searchable catalog snapshot.
24
+
25
+ ``api_key`` falls back to ``BDF_API_KEY`` inside ``enumerate_bdf`` (and
26
+ fast-fails with :class:`~parsimony.errors.UnauthorizedError` if neither is
27
+ set), so the snapshot can be built straight from the environment.
28
+ """
29
+ result = enumerate_bdf(api_key=(api_key or "").strip())
30
+ entries = entities_from_raw(result, BDF_ENUMERATE_OUTPUT)
31
+ catalog = Catalog(CATALOG_NAMESPACE, indexes=discovery_indexes(entries), default_field="title")
32
+ catalog.set_entities(entries)
33
+ catalog.build()
34
+ return catalog
35
+
36
+
37
+ __all__ = ["CATALOG_NAMESPACE", "build_bdf_catalog"]
@@ -0,0 +1,19 @@
1
+ """bdf connector registry."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from parsimony.connector import Connectors
6
+
7
+ from parsimony_bdf.connectors.enumerate import enumerate_bdf
8
+ from parsimony_bdf.connectors.fetch import bdf_fetch
9
+ from parsimony_bdf.search import bdf_search
10
+
11
+ CONNECTORS = Connectors([bdf_fetch, enumerate_bdf, bdf_search])
12
+
13
+
14
+ def load(*, api_key: str) -> Connectors:
15
+ """Return :data:`CONNECTORS` with ``api_key`` bound on every keyed connector."""
16
+ return CONNECTORS.bind(api_key=api_key)
17
+
18
+
19
+ __all__ = ["CONNECTORS", "load"]
@@ -0,0 +1,184 @@
1
+ """Pure (network-free) helpers that turn raw Webstat rows into catalog rows.
2
+
3
+ Kept separate from the enumerator so the row-shaping logic is unit-testable
4
+ without any HTTP. Two builders — one per entity kind — plus the dedup/assembly
5
+ entry point :func:`build_enumerate_rows`.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import Any
11
+
12
+ from parsimony_bdf.outputs import ENUMERATE_COLUMNS
13
+
14
+ # A blank row with every declared column present — @enumerator requires an
15
+ # EXACT column match, so every emitted row must carry all of them.
16
+ _BLANK: dict[str, str] = {name: "" for name in ENUMERATE_COLUMNS}
17
+
18
+
19
+ def _clean(value: Any) -> str:
20
+ """Coerce a raw field to a stripped string (``None`` → empty)."""
21
+ if value is None:
22
+ return ""
23
+ return str(value).strip()
24
+
25
+
26
+ def _as_path(raw: Any) -> str:
27
+ """Flatten a Webstat ``path_*`` field into a single breadcrumb string.
28
+
29
+ The export returns ``path_en`` as a JSON array (e.g.
30
+ ``['Rates and prices/Market interest rates']``), occasionally a bare string.
31
+ Returns the unique non-empty segments joined by `` | ``.
32
+ """
33
+ if raw is None:
34
+ return ""
35
+ items = raw if isinstance(raw, (list, tuple)) else [raw]
36
+ seen: list[str] = []
37
+ for item in items:
38
+ text = _clean(item)
39
+ if text and text not in seen:
40
+ seen.append(text)
41
+ return " | ".join(seen)
42
+
43
+
44
+ def _first_nonempty(*candidates: str) -> str:
45
+ for candidate in candidates:
46
+ if candidate:
47
+ return candidate
48
+ return ""
49
+
50
+
51
+ def _dataset_description(*, name_en: str, name_fr: str, desc_en: str, desc_fr: str) -> str:
52
+ """Bilingual description for a dataset stub (deduping identical halves)."""
53
+ parts: list[str] = []
54
+ for value in (name_en, name_fr, desc_en, desc_fr):
55
+ if value and value.lower() not in {p.lower() for p in parts}:
56
+ parts.append(value)
57
+ return " | ".join(parts)
58
+
59
+
60
+ def dataset_stub_row(dataset: dict[str, Any]) -> dict[str, str] | None:
61
+ """Build the synthetic ``dataset:{id}`` parent row, or ``None`` if unusable."""
62
+ dataset_id = _clean(dataset.get("dataset_id"))
63
+ if not dataset_id:
64
+ return None
65
+ name_en = _clean(dataset.get("name_en"))
66
+ name_fr = _clean(dataset.get("name_fr"))
67
+ desc_en = _clean(dataset.get("description_en"))
68
+ desc_fr = _clean(dataset.get("description_fr"))
69
+ return {
70
+ **_BLANK,
71
+ "code": f"dataset:{dataset_id}",
72
+ "title": _first_nonempty(name_en, name_fr, dataset_id),
73
+ "description": _dataset_description(name_en=name_en, name_fr=name_fr, desc_en=desc_en, desc_fr=desc_fr),
74
+ "entity_type": "dataset",
75
+ "dataset_id": dataset_id,
76
+ }
77
+
78
+
79
+ def _series_description(
80
+ *,
81
+ title_en: str,
82
+ title_fr: str,
83
+ long_en: str,
84
+ long_fr: str,
85
+ path: str,
86
+ dataset_id: str,
87
+ dataset_name: str,
88
+ source_agency: str,
89
+ ) -> str:
90
+ """Fold bilingual titles, breadcrumb and dataset context into one string.
91
+
92
+ Both languages plus the hierarchy path land in ``description`` (which the
93
+ discovery index covers), so an agent searching in English or French — or by
94
+ topic breadcrumb — gets a lexical hit even though ``title`` is single-language.
95
+ """
96
+ bilingual_en = _first_nonempty(long_en, title_en)
97
+ bilingual_fr = _first_nonempty(long_fr, title_fr)
98
+ parts: list[str] = []
99
+ if bilingual_en:
100
+ parts.append(bilingual_en)
101
+ if bilingual_fr and bilingual_fr.lower() != bilingual_en.lower():
102
+ parts.append(bilingual_fr)
103
+ if path:
104
+ parts.append(path)
105
+ ds_ctx = dataset_name or dataset_id
106
+ if ds_ctx:
107
+ parts.append(f"Dataset: {ds_ctx}.")
108
+ if source_agency:
109
+ parts.append(f"Source: {source_agency}.")
110
+ return " | ".join(p for p in parts if p)
111
+
112
+
113
+ def series_row(series: dict[str, Any], dataset_names: dict[str, str]) -> dict[str, str] | None:
114
+ """Build a catalog row for one series, or ``None`` if it has no usable key."""
115
+ series_key = _clean(series.get("series_key"))
116
+ if not series_key:
117
+ return None
118
+ dataset_id = _clean(series.get("dataset_id"))
119
+ title_en = _clean(series.get("title_en"))
120
+ title_fr = _clean(series.get("title_fr"))
121
+ long_en = _clean(series.get("title_long_en"))
122
+ long_fr = _clean(series.get("title_long_fr"))
123
+ path = _as_path(series.get("path_en")) or _as_path(series.get("path_fr"))
124
+ source_agency = _clean(series.get("source_agency"))
125
+ return {
126
+ **_BLANK,
127
+ "code": series_key,
128
+ "title": _first_nonempty(title_en, title_fr, long_en, long_fr, series_key),
129
+ "description": _series_description(
130
+ title_en=title_en,
131
+ title_fr=title_fr,
132
+ long_en=long_en,
133
+ long_fr=long_fr,
134
+ path=path,
135
+ dataset_id=dataset_id,
136
+ dataset_name=dataset_names.get(dataset_id, ""),
137
+ source_agency=source_agency,
138
+ ),
139
+ "entity_type": "series",
140
+ "dataset_id": dataset_id,
141
+ "frequency": _clean(series.get("freq")),
142
+ "ref_area": _clean(series.get("ref_area")),
143
+ "source_agency": source_agency,
144
+ "path": path,
145
+ "first_time_period": _clean(series.get("first_time_period_date")),
146
+ "last_time_period": _clean(series.get("last_time_period_date")),
147
+ }
148
+
149
+
150
+ def build_enumerate_rows(
151
+ datasets: list[dict[str, Any]],
152
+ series: list[dict[str, Any]],
153
+ ) -> list[dict[str, str]]:
154
+ """Assemble catalog rows: dataset stubs first, then series, deduped by code.
155
+
156
+ Dataset stubs come first so that on the (defensive) chance a series key
157
+ collides with a stub code, the stub wins. Series keys are globally unique in
158
+ practice, so the dedup mainly guards against accidental repeats in the feed.
159
+ """
160
+ dataset_names = {
161
+ _clean(d.get("dataset_id")): _first_nonempty(_clean(d.get("name_en")), _clean(d.get("name_fr")))
162
+ for d in datasets
163
+ if _clean(d.get("dataset_id"))
164
+ }
165
+
166
+ rows: list[dict[str, str]] = []
167
+ seen: set[str] = set()
168
+
169
+ for dataset in datasets:
170
+ row = dataset_stub_row(dataset)
171
+ if row is not None and row["code"] not in seen:
172
+ seen.add(row["code"])
173
+ rows.append(row)
174
+
175
+ for item in series:
176
+ row = series_row(item, dataset_names)
177
+ if row is not None and row["code"] not in seen:
178
+ seen.add(row["code"])
179
+ rows.append(row)
180
+
181
+ return rows
182
+
183
+
184
+ __all__ = ["build_enumerate_rows", "dataset_stub_row", "series_row"]
@@ -0,0 +1,114 @@
1
+ """BdF catalog enumeration connector (archetype A: live full-index export).
2
+
3
+ The Webstat ``series`` dataset is a single flat queryable table holding the
4
+ *entire* BdF universe (~41.6k series), so one ``/series/exports/json`` call with
5
+ a lean column projection streams every addressable unit — no per-dataset
6
+ fan-out. A second call to ``webstat-datasets`` supplies the 45 dataflow stubs
7
+ and the dataset names used as series context. Two requests, fully self-tracking:
8
+ completeness is verifiable by diffing ``len(catalog)`` against the live
9
+ ``series`` ``total_count``.
10
+
11
+ The two universe sources are exposed as module-level seams (:func:`_list_datasets`,
12
+ :func:`_list_all_series`) so tests can monkeypatch them to a tiny slice and bound
13
+ the crawl — never pulling the full table offline.
14
+
15
+ Best-effort: a failed source is logged and skipped so a partial catalog still
16
+ builds; the publish job checks ``len(df) == 0`` separately. The returned frame
17
+ matches ``ENUMERATE_COLUMNS`` exactly, as the ``@enumerator`` contract requires.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import logging
23
+ from typing import Any
24
+
25
+ import httpx
26
+ import pandas as pd
27
+ from parsimony.connector import enumerator
28
+ from parsimony_shared.cb_enumerate import ThrottledJsonFetcher
29
+
30
+ from parsimony_bdf._http import (
31
+ BASE_URL,
32
+ CRAWL_TIMEOUT,
33
+ DATASETS_PATH,
34
+ DATASETS_SELECT,
35
+ METADATA_CRAWL,
36
+ SERIES_PATH,
37
+ SERIES_SELECT,
38
+ auth_headers,
39
+ resolve_key,
40
+ )
41
+ from parsimony_bdf.connectors._catalog import build_enumerate_rows
42
+ from parsimony_bdf.outputs import BDF_ENUMERATE_OUTPUT, ENUMERATE_COLUMNS
43
+
44
+ logger = logging.getLogger(__name__)
45
+
46
+
47
+ def _list_datasets(fetcher: ThrottledJsonFetcher) -> list[dict[str, Any]]:
48
+ """Return every BdF dataflow stub (45 rows) from the ``webstat-datasets`` export.
49
+
50
+ A crawl seam: tests monkeypatch this to a small slice to bound the fan-out.
51
+ """
52
+ url = f"{BASE_URL}/{DATASETS_PATH}"
53
+ payload = fetcher.get_json(
54
+ url, params={"select": DATASETS_SELECT, "order_by": "dataset_id"}, label="datasets"
55
+ )
56
+ if not isinstance(payload, list):
57
+ return []
58
+ return [d for d in payload if isinstance(d, dict)]
59
+
60
+
61
+ def _list_all_series(fetcher: ThrottledJsonFetcher) -> list[dict[str, Any]] | None:
62
+ """Return every series row (~41.6k) from the flat ``series`` export in one call.
63
+
64
+ A crawl seam: the live integration test monkeypatches this to a single small
65
+ dataset so it verifies the real export shape without streaming the whole
66
+ table. Returns ``None`` on transport / parse failure so the caller can still
67
+ emit the dataset stubs.
68
+ """
69
+ url = f"{BASE_URL}/{SERIES_PATH}"
70
+ payload = fetcher.get_json(url, params={"select": SERIES_SELECT}, label="series")
71
+ if payload is None:
72
+ return None
73
+ if not isinstance(payload, list):
74
+ return []
75
+ return [s for s in payload if isinstance(s, dict)]
76
+
77
+
78
+ @enumerator(output=BDF_ENUMERATE_OUTPUT, tags=["macro", "fr"], secrets=("api_key",))
79
+ def enumerate_bdf(*, api_key: str = "") -> pd.DataFrame:
80
+ """Enumerate every Banque de France series with parent-dataset context.
81
+
82
+ Streams the full ``series`` table plus the 45 dataflow stubs (two requests),
83
+ emitting one row per series (KEY = ``series_key``) and one ``dataset:{id}``
84
+ stub per dataflow, with bilingual descriptions and breadcrumb paths.
85
+ """
86
+ key = resolve_key(api_key)
87
+
88
+ datasets: list[dict[str, Any]] = []
89
+ series: list[dict[str, Any]] | None = []
90
+
91
+ with httpx.Client(timeout=CRAWL_TIMEOUT, headers=auth_headers(key), follow_redirects=True) as client:
92
+ fetcher = ThrottledJsonFetcher(client, provider="bdf", config=METADATA_CRAWL, logger=logger)
93
+ datasets = _list_datasets(fetcher)
94
+ series = _list_all_series(fetcher)
95
+
96
+ if not datasets:
97
+ logger.warning("BdF enumerate: dataset list unavailable; emitting series only")
98
+ if series is None:
99
+ logger.warning("BdF enumerate: series export failed; emitting dataset stubs only")
100
+ series = []
101
+
102
+ rows = build_enumerate_rows(datasets, series)
103
+ n_series = sum(1 for r in rows if r["entity_type"] == "series")
104
+ logger.info(
105
+ "BdF enumerate: %d datasets, %d series, %d total catalog rows",
106
+ len(datasets),
107
+ n_series,
108
+ len(rows),
109
+ )
110
+
111
+ return pd.DataFrame(rows, columns=list(ENUMERATE_COLUMNS))
112
+
113
+
114
+ __all__ = ["enumerate_bdf"]
@@ -0,0 +1,119 @@
1
+ """BdF series fetch connector (Webstat ``observations`` export).
2
+
3
+ Pulls observation rows for a single SDMX ``series_key`` from the Opendatasoft
4
+ ``observations`` dataset, filtered server-side with an ODSQL ``where`` clause.
5
+ ``obs_value`` is null on missing-status rows (BdF marks gaps with ``OBS_STATUS=M``),
6
+ so values may legitimately be ``None`` between real observations.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from typing import Annotated, Any
12
+
13
+ import pandas as pd
14
+ from parsimony.connector import connector
15
+ from parsimony.errors import EmptyDataError, InvalidParameterError, ParseError
16
+ from parsimony.transport.helpers import fetch_json
17
+
18
+ from parsimony_bdf._http import OBSERVATIONS_PATH, OBSERVATIONS_SELECT, make_fetch_client
19
+ from parsimony_bdf.outputs import BDF_FETCH_OUTPUT, FETCH_COLUMNS
20
+
21
+
22
+ def _validate_period(value: str | None, name: str) -> str | None:
23
+ """Screen a ``YYYY-MM-DD`` period bound before it enters the ODSQL clause.
24
+
25
+ Returns the validated string, or ``None`` when unset. A malformed value is
26
+ rejected pre-network as :class:`InvalidParameterError` rather than being
27
+ embedded in ``date'…'`` for the server to reject with a generic 400.
28
+ """
29
+ if value is None:
30
+ return None
31
+ text = value.strip()
32
+ if not text:
33
+ return None
34
+ parts = text.split("-")
35
+ if len(parts) == 3 and parts[0].isdigit() and len(parts[0]) == 4 and all(p.isdigit() for p in parts[1:]):
36
+ return text
37
+ raise InvalidParameterError("bdf", f"{name} must be an ISO date 'YYYY-MM-DD' (got {value!r})")
38
+
39
+
40
+ def _build_where(series_key: str, start: str | None, end: str | None) -> str:
41
+ where = f'series_key="{series_key}"'
42
+ if start:
43
+ where += f" and time_period_start>=date'{start}'"
44
+ if end:
45
+ where += f" and time_period_start<=date'{end}'"
46
+ return where
47
+
48
+
49
+ def _parse_observations(payload: Any, series_key: str) -> pd.DataFrame:
50
+ """Reshape the flat observations array into ``key,title,date,value`` rows."""
51
+ if not isinstance(payload, list):
52
+ raise ParseError("bdf", f"unexpected response shape for key: {series_key}")
53
+ if not payload:
54
+ raise EmptyDataError("bdf", query_params={"key": series_key})
55
+
56
+ rows: list[dict[str, Any]] = []
57
+ for row in payload:
58
+ if not isinstance(row, dict):
59
+ continue
60
+ date_str = str(row.get("time_period_start") or "").strip()
61
+ if not date_str:
62
+ continue
63
+ raw_value = row.get("obs_value")
64
+ try:
65
+ value = float(raw_value) if raw_value is not None else None
66
+ except (ValueError, TypeError):
67
+ value = None
68
+ title = str(row.get("title_en") or row.get("title_fr") or row.get("series_key") or series_key)
69
+ rows.append(
70
+ {
71
+ "key": str(row.get("series_key") or series_key),
72
+ "title": title,
73
+ "date": date_str,
74
+ "value": value,
75
+ }
76
+ )
77
+
78
+ if not rows:
79
+ raise EmptyDataError("bdf", query_params={"key": series_key})
80
+ return pd.DataFrame(rows, columns=list(FETCH_COLUMNS))
81
+
82
+
83
+ @connector(output=BDF_FETCH_OUTPUT, tags=["macro", "fr"], secrets=("api_key",))
84
+ def bdf_fetch(
85
+ key: Annotated[str, "ns:bdf"],
86
+ start_period: str | None = None,
87
+ end_period: str | None = None,
88
+ api_key: str = "",
89
+ ) -> pd.DataFrame:
90
+ """Fetch Banque de France time series via the Webstat Opendatasoft API.
91
+
92
+ Pulls observation rows for a single dot-separated SDMX series ``key`` (e.g.
93
+ ``EXR.M.USD.EUR.SP00.E``) and returns ``(key, title, date, value)`` rows
94
+ ordered by date. Optional ``start_period`` / ``end_period`` (``YYYY-MM-DD``)
95
+ bound the result on ``time_period_start``. Discover keys with ``bdf_search``
96
+ or ``enumerate_bdf``. Missing-status gaps come back with a null ``value``.
97
+ """
98
+ series_key = key.strip()
99
+ if not series_key:
100
+ raise InvalidParameterError("bdf", "key must be non-empty")
101
+ start = _validate_period(start_period, "start_period")
102
+ end = _validate_period(end_period, "end_period")
103
+
104
+ http = make_fetch_client(api_key)
105
+ payload = fetch_json(
106
+ http,
107
+ path=OBSERVATIONS_PATH,
108
+ params={
109
+ "select": OBSERVATIONS_SELECT,
110
+ "where": _build_where(series_key, start, end),
111
+ "order_by": "time_period_start",
112
+ },
113
+ provider="bdf",
114
+ op_name="observations",
115
+ )
116
+ return _parse_observations(payload, series_key)
117
+
118
+
119
+ __all__ = ["bdf_fetch"]
@@ -0,0 +1,53 @@
1
+ """BdF connector output schemas."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from parsimony.result import Column, ColumnRole, OutputConfig
6
+
7
+ # The enumerator emits two entity kinds, distinguished by ``entity_type``:
8
+ #
9
+ # * ``series`` — KEY is the bare ``series_key`` (already globally unique, e.g.
10
+ # ``EXR.M.USD.EUR.SP00.E`` — the dataflow prefix makes it unique, so no
11
+ # compound code is needed).
12
+ # * ``dataset`` — a synthetic parent stub keyed ``dataset:{dataset_id}`` (45 of
13
+ # these), mirroring BoJ's ``db:`` / BdP's ``dataset:`` so a consumer can split
14
+ # entity kinds by KEY prefix alone (or by the ``entity_type`` column).
15
+ #
16
+ # ``title`` is the English short title (French / long / key fallback); the
17
+ # bilingual long titles plus the breadcrumb ``path`` and dataset context are
18
+ # folded into ``description`` so the discovery index (which also covers
19
+ # ``description``) gives recall in both languages.
20
+ BDF_ENUMERATE_OUTPUT = OutputConfig(
21
+ columns=[
22
+ Column(name="code", role=ColumnRole.KEY, namespace="bdf"),
23
+ Column(name="title", role=ColumnRole.TITLE),
24
+ Column(name="description", role=ColumnRole.METADATA),
25
+ Column(name="entity_type", role=ColumnRole.METADATA), # "dataset" | "series"
26
+ Column(name="dataset_id", role=ColumnRole.METADATA),
27
+ Column(name="frequency", role=ColumnRole.METADATA),
28
+ Column(name="ref_area", role=ColumnRole.METADATA),
29
+ Column(name="source_agency", role=ColumnRole.METADATA),
30
+ Column(name="path", role=ColumnRole.METADATA), # EN breadcrumb hierarchy
31
+ Column(name="first_time_period", role=ColumnRole.METADATA),
32
+ Column(name="last_time_period", role=ColumnRole.METADATA),
33
+ ]
34
+ )
35
+
36
+ BDF_FETCH_OUTPUT = OutputConfig(
37
+ columns=[
38
+ Column(name="key", role=ColumnRole.KEY, namespace="bdf"),
39
+ Column(name="title", role=ColumnRole.TITLE),
40
+ Column(name="date", dtype="datetime", role=ColumnRole.DATA),
41
+ Column(name="value", dtype="numeric", role=ColumnRole.DATA),
42
+ ]
43
+ )
44
+
45
+ ENUMERATE_COLUMNS: tuple[str, ...] = tuple(c.name for c in BDF_ENUMERATE_OUTPUT.columns)
46
+ FETCH_COLUMNS: tuple[str, ...] = tuple(c.name for c in BDF_FETCH_OUTPUT.columns)
47
+
48
+ __all__ = [
49
+ "BDF_ENUMERATE_OUTPUT",
50
+ "BDF_FETCH_OUTPUT",
51
+ "ENUMERATE_COLUMNS",
52
+ "FETCH_COLUMNS",
53
+ ]
File without changes
@@ -0,0 +1,37 @@
1
+ """Semantic search over the published Banque de France (BdF) catalog."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from parsimony.catalog.search import CatalogSearchParams, make_local_search_connector
6
+ from parsimony.result import Column, ColumnRole, OutputConfig
7
+
8
+ from parsimony_bdf.catalog_build import build_bdf_catalog
9
+
10
+ BdfSearchParams = CatalogSearchParams
11
+
12
+ PARSIMONY_BDF_CATALOG_URL_ENV = "PARSIMONY_BDF_CATALOG_URL"
13
+
14
+ BDF_SEARCH_OUTPUT = OutputConfig(
15
+ columns=[
16
+ Column(name="code", role=ColumnRole.KEY, namespace="bdf"),
17
+ Column(name="title", role=ColumnRole.TITLE),
18
+ Column(name="score", role=ColumnRole.DATA),
19
+ ]
20
+ )
21
+
22
+ bdf_search = make_local_search_connector(
23
+ provider="bdf",
24
+ default_url="hf://parsimony-dev/bdf",
25
+ catalog_url_env_var=PARSIMONY_BDF_CATALOG_URL_ENV,
26
+ build_catalog=build_bdf_catalog,
27
+ tags=["macro", "fr", "tool"],
28
+ description=(
29
+ "Semantic-search the Banque de France (BdF) Webstat catalog of French "
30
+ "macroeconomic, monetary and financial time series. Returns ranked SDMX "
31
+ "series codes (and dataset:<id> group rows); pass a series code to "
32
+ "bdf_fetch(key=...) to retrieve its observations."
33
+ ),
34
+ output_columns=BDF_SEARCH_OUTPUT.columns,
35
+ )
36
+
37
+ __all__ = ["PARSIMONY_BDF_CATALOG_URL_ENV", "BdfSearchParams", "bdf_search"]
@@ -0,0 +1,80 @@
1
+ [project]
2
+ name = "parsimony-bdf"
3
+ version = "0.0.1"
4
+ description = "Banque de France connector for the parsimony framework"
5
+ authors = [{ name = "Ockham.sh", email = "team@ockham.sh" }]
6
+ license = "Apache-2.0"
7
+ readme = "README.md"
8
+ requires-python = ">=3.11"
9
+ keywords = ["finance", "data", "connectors", "parsimony", "bdf"]
10
+ classifiers = [
11
+ "Development Status :: 4 - Beta",
12
+ "Intended Audience :: Developers",
13
+ "Intended Audience :: Financial and Insurance Industry",
14
+ "License :: OSI Approved :: Apache Software License",
15
+ "Programming Language :: Python :: 3",
16
+ "Programming Language :: Python :: 3.11",
17
+ "Programming Language :: Python :: 3.12",
18
+ "Programming Language :: Python :: 3.13",
19
+ "Topic :: Office/Business :: Financial",
20
+ "Topic :: Software Development :: Libraries :: Python Modules",
21
+ "Typing :: Typed",
22
+ ]
23
+ dependencies = [
24
+ "parsimony-core[catalog]>=0.0.1",
25
+ "parsimony-shared>=0.0.1",
26
+ "pydantic>=2.11.1,<3",
27
+ "pandas>=2.3.0,<3",
28
+ ]
29
+
30
+ [project.optional-dependencies]
31
+ dev = [
32
+ "pytest>=9.0.3",
33
+ "pytest-cov>=5.0",
34
+ "respx>=0.22.0",
35
+ "ruff>=0.15.10",
36
+ "mypy>=1.10",
37
+ ]
38
+
39
+ [project.urls]
40
+ Homepage = "https://www.banque-france.fr"
41
+ Repository = "https://github.com/ockham-sh/parsimony-connectors"
42
+ Issues = "https://github.com/ockham-sh/parsimony-connectors/issues"
43
+
44
+ [project.entry-points."parsimony.providers"]
45
+ bdf = "parsimony_bdf"
46
+
47
+
48
+ [build-system]
49
+ requires = ["hatchling"]
50
+ build-backend = "hatchling.build"
51
+
52
+ [tool.hatch.build.targets.wheel]
53
+ packages = ["parsimony_bdf"]
54
+
55
+ [tool.hatch.build.targets.sdist]
56
+ include = ["parsimony_bdf", "README.md", "LICENSE", "CHANGELOG.md"]
57
+
58
+ [tool.ruff]
59
+ target-version = "py311"
60
+ line-length = 120
61
+
62
+ [tool.ruff.lint]
63
+ select = ["E", "F", "I", "UP", "B", "SIM"]
64
+
65
+ [tool.ruff.lint.per-file-ignores]
66
+ # Publish scripts call logging.basicConfig() before importing parsimony so
67
+ # the kernel's INFO logs surface during the long-running publish pipeline.
68
+ "scripts/*" = ["E402"]
69
+
70
+ [tool.mypy]
71
+ python_version = "3.11"
72
+ warn_return_any = true
73
+ warn_unused_ignores = true
74
+ ignore_missing_imports = true
75
+
76
+ [tool.pytest.ini_options]
77
+ addopts = "--import-mode=importlib -m 'not integration'"
78
+ markers = [
79
+ "integration: hits live APIs (may be slow, requires env vars)",
80
+ ]