pea-audit 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pea_audit-0.1.0/.gitignore +39 -0
- pea_audit-0.1.0/LICENSE +21 -0
- pea_audit-0.1.0/PKG-INFO +239 -0
- pea_audit-0.1.0/README.md +180 -0
- pea_audit-0.1.0/pea_audit/__init__.py +56 -0
- pea_audit-0.1.0/pea_audit/cache.py +55 -0
- pea_audit-0.1.0/pea_audit/compare.py +33 -0
- pea_audit-0.1.0/pea_audit/core.py +174 -0
- pea_audit-0.1.0/pea_audit/isin.py +52 -0
- pea_audit-0.1.0/pea_audit/llm/__init__.py +10 -0
- pea_audit-0.1.0/pea_audit/llm/base.py +47 -0
- pea_audit-0.1.0/pea_audit/llm/ollama.py +171 -0
- pea_audit-0.1.0/pea_audit/pdf.py +25 -0
- pea_audit-0.1.0/pea_audit/prompts/audit_v1.md +43 -0
- pea_audit-0.1.0/pea_audit/prompts/audit_v2.md +55 -0
- pea_audit-0.1.0/pea_audit/prompts.py +51 -0
- pea_audit-0.1.0/pea_audit/py.typed +0 -0
- pea_audit-0.1.0/pea_audit/sources/__init__.py +22 -0
- pea_audit-0.1.0/pea_audit/sources/amundi.py +43 -0
- pea_audit-0.1.0/pea_audit/sources/base.py +47 -0
- pea_audit-0.1.0/pea_audit/sources/blackrock.py +21 -0
- pea_audit-0.1.0/pea_audit/sources/bnp.py +16 -0
- pea_audit-0.1.0/pea_audit/sources/vanguard.py +13 -0
- pea_audit-0.1.0/pea_audit/ticker.py +116 -0
- pea_audit-0.1.0/pyproject.toml +68 -0
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# Secrets — ne jamais committer
|
|
2
|
+
.env
|
|
3
|
+
|
|
4
|
+
# Caches d'audit (verdicts + PDFs téléchargés)
|
|
5
|
+
cache/
|
|
6
|
+
|
|
7
|
+
# Eval PDFs téléchargés (re-téléchargeables depuis les URLs des cases)
|
|
8
|
+
evals/data/
|
|
9
|
+
|
|
10
|
+
# Artefacts de test
|
|
11
|
+
*.pdf
|
|
12
|
+
!samples/*.pdf
|
|
13
|
+
*.png
|
|
14
|
+
|
|
15
|
+
# Python
|
|
16
|
+
__pycache__/
|
|
17
|
+
*.pyc
|
|
18
|
+
*.egg-info/
|
|
19
|
+
.venv/
|
|
20
|
+
venv/
|
|
21
|
+
.pytest_cache/
|
|
22
|
+
.mypy_cache/
|
|
23
|
+
.ruff_cache/
|
|
24
|
+
|
|
25
|
+
# Build artifacts
|
|
26
|
+
dist/
|
|
27
|
+
build/
|
|
28
|
+
|
|
29
|
+
# macOS
|
|
30
|
+
.DS_Store
|
|
31
|
+
|
|
32
|
+
# Streamlit
|
|
33
|
+
.streamlit/secrets.toml
|
|
34
|
+
|
|
35
|
+
# Playwright traces
|
|
36
|
+
.playwright-mcp/
|
|
37
|
+
|
|
38
|
+
# User's real portfolio (template lives in positions.csv.example)
|
|
39
|
+
positions.csv
|
pea_audit-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Andrey Kanmegne
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
pea_audit-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pea-audit
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Audit PEA-eligibility of ETF KID documents with a vision LLM. French PEA (Plan d'Épargne en Actions) rules built in.
|
|
5
|
+
Project-URL: Homepage, https://github.com/AndreLiar/pea-audit
|
|
6
|
+
Project-URL: Repository, https://github.com/AndreLiar/pea-audit
|
|
7
|
+
Project-URL: Issues, https://github.com/AndreLiar/pea-audit/issues
|
|
8
|
+
Project-URL: Changelog, https://github.com/AndreLiar/pea-audit/blob/main/CHANGELOG.md
|
|
9
|
+
Author: Andrey Kanmegne
|
|
10
|
+
License: MIT License
|
|
11
|
+
|
|
12
|
+
Copyright (c) 2026 Andrey Kanmegne
|
|
13
|
+
|
|
14
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
15
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
16
|
+
in the Software without restriction, including without limitation the rights
|
|
17
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
18
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
19
|
+
furnished to do so, subject to the following conditions:
|
|
20
|
+
|
|
21
|
+
The above copyright notice and this permission notice shall be included in all
|
|
22
|
+
copies or substantial portions of the Software.
|
|
23
|
+
|
|
24
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
25
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
26
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
27
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
28
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
29
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
30
|
+
SOFTWARE.
|
|
31
|
+
License-File: LICENSE
|
|
32
|
+
Keywords: audit,etf,finance,gemma,kid,llm,ollama,pea,priips
|
|
33
|
+
Classifier: Development Status :: 3 - Alpha
|
|
34
|
+
Classifier: Intended Audience :: Developers
|
|
35
|
+
Classifier: Intended Audience :: Financial and Insurance Industry
|
|
36
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
37
|
+
Classifier: Natural Language :: French
|
|
38
|
+
Classifier: Programming Language :: Python :: 3
|
|
39
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
40
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
41
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
42
|
+
Classifier: Topic :: Office/Business :: Financial
|
|
43
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
44
|
+
Classifier: Typing :: Typed
|
|
45
|
+
Requires-Python: >=3.10
|
|
46
|
+
Requires-Dist: ollama>=0.4.0
|
|
47
|
+
Requires-Dist: pypdfium2>=4.30.0
|
|
48
|
+
Requires-Dist: requests>=2.30
|
|
49
|
+
Requires-Dist: tenacity>=9.0
|
|
50
|
+
Provides-Extra: dev
|
|
51
|
+
Requires-Dist: langfuse>=4.0; extra == 'dev'
|
|
52
|
+
Requires-Dist: python-dotenv>=1.0; extra == 'dev'
|
|
53
|
+
Requires-Dist: pyyaml>=6.0; extra == 'dev'
|
|
54
|
+
Provides-Extra: evals
|
|
55
|
+
Requires-Dist: pyyaml>=6.0; extra == 'evals'
|
|
56
|
+
Provides-Extra: observability
|
|
57
|
+
Requires-Dist: langfuse>=4.0; extra == 'observability'
|
|
58
|
+
Description-Content-Type: text/markdown
|
|
59
|
+
|
|
60
|
+
# pea-audit
|
|
61
|
+
|
|
62
|
+
[](https://pypi.org/project/pea-audit/)
|
|
63
|
+
[](https://pypi.org/project/pea-audit/)
|
|
64
|
+
[](LICENSE)
|
|
65
|
+
|
|
66
|
+
Audit French **PEA** (Plan d'Épargne en Actions) eligibility of ETFs by reading their **KID** (Key Information Document) with a vision LLM. Tells you whether a fund is actually eligible for a French PEA account — with verbatim citations from the document.
|
|
67
|
+
|
|
68
|
+
```
|
|
69
|
+
$ python audit_cli.py samples/amundi_pea_monde_kid.pdf
|
|
70
|
+
📄 Audit de : samples/amundi_pea_monde_kid.pdf
|
|
71
|
+
|
|
72
|
+
✅ ÉLIGIBLE PEA (confiance : high)
|
|
73
|
+
|
|
74
|
+
Émetteur : Amundi
|
|
75
|
+
ISIN : FR001400U5Q4
|
|
76
|
+
Indice : MSCI World Index EUR
|
|
77
|
+
Réplication : synthetic_swap
|
|
78
|
+
|
|
79
|
+
Le fonds est éligible au PEA car il utilise une réplication synthétique
|
|
80
|
+
via swap (IFT) avec un panier d'actions européennes ≥75%.
|
|
81
|
+
|
|
82
|
+
Preuves :
|
|
83
|
+
p.1 — « Le Fonds est éligible au Plan d'Épargne en Actions français (PEA) ... »
|
|
84
|
+
p.1 — « La performance sera échangée contre celle de l'Indice de Référence ... »
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## Why
|
|
88
|
+
|
|
89
|
+
PEA eligibility is opaque and *changes silently* — issuers re-domicile, swap counterparties, switch to ESG-screened variants, and rename funds (e.g. Amundi PEA Nasdaq-100 silently became "Amundi PEA US Tech Screened" under the same ticker). Brokers don't always flag this. `pea-audit` reads each fund's KID directly and tells you what the document actually says, with quotes you can verify.
|
|
90
|
+
|
|
91
|
+
## Install
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
pip install pea-audit
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
Optional extras:
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
pip install 'pea-audit[observability]' # adds Langfuse for LLM tracing
|
|
101
|
+
pip install 'pea-audit[evals]' # adds pyyaml for the eval suite
|
|
102
|
+
pip install 'pea-audit[dev]' # everything above + python-dotenv
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## Quickstart
|
|
106
|
+
|
|
107
|
+
```python
|
|
108
|
+
from pathlib import Path
|
|
109
|
+
from pea_audit import audit_pdf, VerdictCache
|
|
110
|
+
from pea_audit.llm import OllamaCloudClient
|
|
111
|
+
|
|
112
|
+
# Default backend: Ollama Cloud running Gemma 4 31b
|
|
113
|
+
llm = OllamaCloudClient(api_key="sk-...") # from https://ollama.com/settings/keys
|
|
114
|
+
|
|
115
|
+
# Cache is opt-in. Library never writes to disk unless you supply one.
|
|
116
|
+
cache = VerdictCache(Path("./cache"))
|
|
117
|
+
|
|
118
|
+
verdict = audit_pdf("path/to/kid.pdf", llm=llm, cache=cache)
|
|
119
|
+
|
|
120
|
+
print(verdict.eligible) # "yes" | "no" | "uncertain"
|
|
121
|
+
print(verdict.replication) # "physical" | "synthetic_swap" | "unknown"
|
|
122
|
+
print(verdict.isin) # deterministic — extracted from PDF text + Luhn-validated
|
|
123
|
+
for c in verdict.evidence:
|
|
124
|
+
print(f" p.{c.page}: « {c.quote} »")
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
### Audit by ticker (built-in URL registry)
|
|
128
|
+
|
|
129
|
+
```python
|
|
130
|
+
from pea_audit import audit_ticker, VerdictCache
|
|
131
|
+
from pea_audit.llm import OllamaCloudClient
|
|
132
|
+
|
|
133
|
+
llm = OllamaCloudClient(api_key="sk-...")
|
|
134
|
+
cache = VerdictCache(Path("./cache"))
|
|
135
|
+
|
|
136
|
+
result = audit_ticker("EWLD.PA", llm=llm, kid_dir=Path("./kids"), cache=cache)
|
|
137
|
+
print(result.verdict.eligible) # "yes"
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
Built-ins ship for the most common French ETFs (Amundi PEA range, BNP Paribas Easy). Add more:
|
|
141
|
+
|
|
142
|
+
```python
|
|
143
|
+
from pea_audit.sources import register_source, KIDSource
|
|
144
|
+
|
|
145
|
+
register_source(KIDSource(
|
|
146
|
+
ticker="LYX.PA",
|
|
147
|
+
isin="FR0010411884",
|
|
148
|
+
url="https://www.lyxoretf.fr/.../kid.pdf",
|
|
149
|
+
issuer="Lyxor",
|
|
150
|
+
))
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
## Architecture
|
|
154
|
+
|
|
155
|
+
Two protocols make this library extensible without forking:
|
|
156
|
+
|
|
157
|
+
### `VisionLLM` — swap the model
|
|
158
|
+
|
|
159
|
+
```python
|
|
160
|
+
from typing import Any, Protocol
|
|
161
|
+
|
|
162
|
+
class VisionLLM(Protocol):
|
|
163
|
+
def analyze_images(
|
|
164
|
+
self,
|
|
165
|
+
images: list[bytes],
|
|
166
|
+
prompt: str,
|
|
167
|
+
schema: dict[str, Any],
|
|
168
|
+
system: str | None = None,
|
|
169
|
+
) -> dict[str, Any]: ...
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
The default `OllamaCloudClient` wraps Gemma 4 via Ollama Cloud with `tenacity` retries on transient errors and optional Langfuse tracing. Anyone can implement this protocol to plug in Claude vision, GPT-4o, Gemini, a local Ollama instance, etc.
|
|
173
|
+
|
|
174
|
+
### `KIDSource` — add issuers
|
|
175
|
+
|
|
176
|
+
```python
|
|
177
|
+
from pea_audit.sources import register_source, KIDSource, get_source, all_sources
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
A registry of ticker → KID URL mappings. Ships builtins for Amundi (URL pattern), BNP Paribas (per-fund UUIDs); URL helpers for BlackRock/iShares + Vanguard are importable but don't auto-register (most of their funds are PEA-ineligible — they're for testing the negative path).
|
|
181
|
+
|
|
182
|
+
## Eval baseline
|
|
183
|
+
|
|
184
|
+
The repo ships **13 regression cases** under `evals/cases/*.yaml` — 7 PEA-eligible synthetic-swap, 6 ineligible physical non-EEA — covering Amundi, BNP, BlackRock/iShares, Vanguard. Current baseline on Gemma 4 31b-cloud: **13/13 (100%)**. Run before any prompt or model change:
|
|
185
|
+
|
|
186
|
+
```bash
|
|
187
|
+
python evals/run.py
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
## Production niceties
|
|
191
|
+
|
|
192
|
+
- **Retries on transient errors** — `tenacity` with exponential backoff (1s → 4s → 16s), only on network/timeout/5xx (not on 4xx or schema errors that won't self-resolve)
|
|
193
|
+
- **Optional observability** — Langfuse traces per LLM call (model, input/output, tokens, latency). Activates when `LANGFUSE_PUBLIC_KEY`/`LANGFUSE_SECRET_KEY` are set, silent no-op otherwise
|
|
194
|
+
- **Deterministic ISINs** — vision misreads of the 12-char ISIN string are corrected by regex-extracting candidates from the PDF text layer and validating with the Luhn check digit
|
|
195
|
+
- **Versioned prompts** — `pea_audit/prompts/audit_v{N}.md` files, selected via `prompt_version=` parameter; rollback is a config change, not a code edit
|
|
196
|
+
- **Hard vs soft fields in diffs** — `compare_verdicts()` defaults to comparing only categorical fields (`eligible`, `replication`, `isin`) so monthly re-audit doesn't false-fire on LLM rephrasing of free-text issuer/index names
|
|
197
|
+
|
|
198
|
+
## Reference app: ETFTracker
|
|
199
|
+
|
|
200
|
+
The repo also ships a personal-tool app that consumes the library: a French ETF portfolio tracker with a Streamlit dashboard, monthly re-audit cron, FastAPI service, and Docker compose deployment. See `ETFTracker.md` (French) for that side.
|
|
201
|
+
|
|
202
|
+
To run it: `cp positions.csv.example positions.csv`, edit with your own holdings, `cp .env.example .env` with your Ollama key, then `docker compose up -d web` or `streamlit run dashboard.py`.
|
|
203
|
+
|
|
204
|
+
## Publishing checklist (maintainer)
|
|
205
|
+
|
|
206
|
+
PyPI publication uses [trusted publishers](https://docs.pypi.org/trusted-publishers/) (OIDC) — no API token secret needed in CI.
|
|
207
|
+
|
|
208
|
+
One-time setup:
|
|
209
|
+
|
|
210
|
+
1. Create the project on https://pypi.org (or first on https://test.pypi.org for a dry-run)
|
|
211
|
+
2. Add a Trusted Publisher pointing to `release.yml` in this repo, environment `pypi`
|
|
212
|
+
3. In GitHub repo settings, create the `pypi` environment (no secrets needed)
|
|
213
|
+
|
|
214
|
+
Per-release:
|
|
215
|
+
|
|
216
|
+
```bash
|
|
217
|
+
# 1. Update version in pyproject.toml + add entry to CHANGELOG.md
|
|
218
|
+
# 2. Verify it builds + tests pass
|
|
219
|
+
python -m build
|
|
220
|
+
pytest tests/
|
|
221
|
+
|
|
222
|
+
# 3. Tag and push — CI takes over
|
|
223
|
+
git tag v0.1.0
|
|
224
|
+
git push origin v0.1.0
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
The `release.yml` workflow builds the wheel + sdist and publishes to PyPI automatically on tag push.
|
|
228
|
+
|
|
229
|
+
## Contributing
|
|
230
|
+
|
|
231
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md).
|
|
232
|
+
|
|
233
|
+
## License
|
|
234
|
+
|
|
235
|
+
[MIT](LICENSE).
|
|
236
|
+
|
|
237
|
+
## Disclaimer
|
|
238
|
+
|
|
239
|
+
This is a personal-finance tool. The LLM-judged eligibility verdict is informational, not regulatory advice — always cross-check against the actual DIC/KID before buying.
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
# pea-audit
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/pea-audit/)
|
|
4
|
+
[](https://pypi.org/project/pea-audit/)
|
|
5
|
+
[](LICENSE)
|
|
6
|
+
|
|
7
|
+
Audit French **PEA** (Plan d'Épargne en Actions) eligibility of ETFs by reading their **KID** (Key Information Document) with a vision LLM. Tells you whether a fund is actually eligible for a French PEA account — with verbatim citations from the document.
|
|
8
|
+
|
|
9
|
+
```
|
|
10
|
+
$ python audit_cli.py samples/amundi_pea_monde_kid.pdf
|
|
11
|
+
📄 Audit de : samples/amundi_pea_monde_kid.pdf
|
|
12
|
+
|
|
13
|
+
✅ ÉLIGIBLE PEA (confiance : high)
|
|
14
|
+
|
|
15
|
+
Émetteur : Amundi
|
|
16
|
+
ISIN : FR001400U5Q4
|
|
17
|
+
Indice : MSCI World Index EUR
|
|
18
|
+
Réplication : synthetic_swap
|
|
19
|
+
|
|
20
|
+
Le fonds est éligible au PEA car il utilise une réplication synthétique
|
|
21
|
+
via swap (IFT) avec un panier d'actions européennes ≥75%.
|
|
22
|
+
|
|
23
|
+
Preuves :
|
|
24
|
+
p.1 — « Le Fonds est éligible au Plan d'Épargne en Actions français (PEA) ... »
|
|
25
|
+
p.1 — « La performance sera échangée contre celle de l'Indice de Référence ... »
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Why
|
|
29
|
+
|
|
30
|
+
PEA eligibility is opaque and *changes silently* — issuers re-domicile, swap counterparties, switch to ESG-screened variants, and rename funds (e.g. Amundi PEA Nasdaq-100 silently became "Amundi PEA US Tech Screened" under the same ticker). Brokers don't always flag this. `pea-audit` reads each fund's KID directly and tells you what the document actually says, with quotes you can verify.
|
|
31
|
+
|
|
32
|
+
## Install
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install pea-audit
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
Optional extras:
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
pip install 'pea-audit[observability]' # adds Langfuse for LLM tracing
|
|
42
|
+
pip install 'pea-audit[evals]' # adds pyyaml for the eval suite
|
|
43
|
+
pip install 'pea-audit[dev]' # everything above + python-dotenv
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Quickstart
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
from pathlib import Path
|
|
50
|
+
from pea_audit import audit_pdf, VerdictCache
|
|
51
|
+
from pea_audit.llm import OllamaCloudClient
|
|
52
|
+
|
|
53
|
+
# Default backend: Ollama Cloud running Gemma 4 31b
|
|
54
|
+
llm = OllamaCloudClient(api_key="sk-...") # from https://ollama.com/settings/keys
|
|
55
|
+
|
|
56
|
+
# Cache is opt-in. Library never writes to disk unless you supply one.
|
|
57
|
+
cache = VerdictCache(Path("./cache"))
|
|
58
|
+
|
|
59
|
+
verdict = audit_pdf("path/to/kid.pdf", llm=llm, cache=cache)
|
|
60
|
+
|
|
61
|
+
print(verdict.eligible) # "yes" | "no" | "uncertain"
|
|
62
|
+
print(verdict.replication) # "physical" | "synthetic_swap" | "unknown"
|
|
63
|
+
print(verdict.isin) # deterministic — extracted from PDF text + Luhn-validated
|
|
64
|
+
for c in verdict.evidence:
|
|
65
|
+
print(f" p.{c.page}: « {c.quote} »")
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
### Audit by ticker (built-in URL registry)
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
from pea_audit import audit_ticker, VerdictCache
|
|
72
|
+
from pea_audit.llm import OllamaCloudClient
|
|
73
|
+
|
|
74
|
+
llm = OllamaCloudClient(api_key="sk-...")
|
|
75
|
+
cache = VerdictCache(Path("./cache"))
|
|
76
|
+
|
|
77
|
+
result = audit_ticker("EWLD.PA", llm=llm, kid_dir=Path("./kids"), cache=cache)
|
|
78
|
+
print(result.verdict.eligible) # "yes"
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Built-ins ship for the most common French ETFs (Amundi PEA range, BNP Paribas Easy). Add more:
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
from pea_audit.sources import register_source, KIDSource
|
|
85
|
+
|
|
86
|
+
register_source(KIDSource(
|
|
87
|
+
ticker="LYX.PA",
|
|
88
|
+
isin="FR0010411884",
|
|
89
|
+
url="https://www.lyxoretf.fr/.../kid.pdf",
|
|
90
|
+
issuer="Lyxor",
|
|
91
|
+
))
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
## Architecture
|
|
95
|
+
|
|
96
|
+
Two protocols make this library extensible without forking:
|
|
97
|
+
|
|
98
|
+
### `VisionLLM` — swap the model
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
from typing import Any, Protocol
|
|
102
|
+
|
|
103
|
+
class VisionLLM(Protocol):
|
|
104
|
+
def analyze_images(
|
|
105
|
+
self,
|
|
106
|
+
images: list[bytes],
|
|
107
|
+
prompt: str,
|
|
108
|
+
schema: dict[str, Any],
|
|
109
|
+
system: str | None = None,
|
|
110
|
+
) -> dict[str, Any]: ...
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
The default `OllamaCloudClient` wraps Gemma 4 via Ollama Cloud with `tenacity` retries on transient errors and optional Langfuse tracing. Anyone can implement this protocol to plug in Claude vision, GPT-4o, Gemini, a local Ollama instance, etc.
|
|
114
|
+
|
|
115
|
+
### `KIDSource` — add issuers
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
from pea_audit.sources import register_source, KIDSource, get_source, all_sources
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
A registry of ticker → KID URL mappings. Ships builtins for Amundi (URL pattern), BNP Paribas (per-fund UUIDs); URL helpers for BlackRock/iShares + Vanguard are importable but don't auto-register (most of their funds are PEA-ineligible — they're for testing the negative path).
|
|
122
|
+
|
|
123
|
+
## Eval baseline
|
|
124
|
+
|
|
125
|
+
The repo ships **13 regression cases** under `evals/cases/*.yaml` — 7 PEA-eligible synthetic-swap, 6 ineligible physical non-EEA — covering Amundi, BNP, BlackRock/iShares, Vanguard. Current baseline on Gemma 4 31b-cloud: **13/13 (100%)**. Run before any prompt or model change:
|
|
126
|
+
|
|
127
|
+
```bash
|
|
128
|
+
python evals/run.py
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
## Production niceties
|
|
132
|
+
|
|
133
|
+
- **Retries on transient errors** — `tenacity` with exponential backoff (1s → 4s → 16s), only on network/timeout/5xx (not on 4xx or schema errors that won't self-resolve)
|
|
134
|
+
- **Optional observability** — Langfuse traces per LLM call (model, input/output, tokens, latency). Activates when `LANGFUSE_PUBLIC_KEY`/`LANGFUSE_SECRET_KEY` are set, silent no-op otherwise
|
|
135
|
+
- **Deterministic ISINs** — vision misreads of the 12-char ISIN string are corrected by regex-extracting candidates from the PDF text layer and validating with the Luhn check digit
|
|
136
|
+
- **Versioned prompts** — `pea_audit/prompts/audit_v{N}.md` files, selected via `prompt_version=` parameter; rollback is a config change, not a code edit
|
|
137
|
+
- **Hard vs soft fields in diffs** — `compare_verdicts()` defaults to comparing only categorical fields (`eligible`, `replication`, `isin`) so monthly re-audit doesn't false-fire on LLM rephrasing of free-text issuer/index names
|
|
138
|
+
|
|
139
|
+
## Reference app: ETFTracker
|
|
140
|
+
|
|
141
|
+
The repo also ships a personal-tool app that consumes the library: a French ETF portfolio tracker with a Streamlit dashboard, monthly re-audit cron, FastAPI service, and Docker compose deployment. See `ETFTracker.md` (French) for that side.
|
|
142
|
+
|
|
143
|
+
To run it: `cp positions.csv.example positions.csv`, edit with your own holdings, `cp .env.example .env` with your Ollama key, then `docker compose up -d web` or `streamlit run dashboard.py`.
|
|
144
|
+
|
|
145
|
+
## Publishing checklist (maintainer)
|
|
146
|
+
|
|
147
|
+
PyPI publication uses [trusted publishers](https://docs.pypi.org/trusted-publishers/) (OIDC) — no API token secret needed in CI.
|
|
148
|
+
|
|
149
|
+
One-time setup:
|
|
150
|
+
|
|
151
|
+
1. Create the project on https://pypi.org (or first on https://test.pypi.org for a dry-run)
|
|
152
|
+
2. Add a Trusted Publisher pointing to `release.yml` in this repo, environment `pypi`
|
|
153
|
+
3. In GitHub repo settings, create the `pypi` environment (no secrets needed)
|
|
154
|
+
|
|
155
|
+
Per-release:
|
|
156
|
+
|
|
157
|
+
```bash
|
|
158
|
+
# 1. Update version in pyproject.toml + add entry to CHANGELOG.md
|
|
159
|
+
# 2. Verify it builds + tests pass
|
|
160
|
+
python -m build
|
|
161
|
+
pytest tests/
|
|
162
|
+
|
|
163
|
+
# 3. Tag and push — CI takes over
|
|
164
|
+
git tag v0.1.0
|
|
165
|
+
git push origin v0.1.0
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
The `release.yml` workflow builds the wheel + sdist and publishes to PyPI automatically on tag push.
|
|
169
|
+
|
|
170
|
+
## Contributing
|
|
171
|
+
|
|
172
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md).
|
|
173
|
+
|
|
174
|
+
## License
|
|
175
|
+
|
|
176
|
+
[MIT](LICENSE).
|
|
177
|
+
|
|
178
|
+
## Disclaimer
|
|
179
|
+
|
|
180
|
+
This is a personal-finance tool. The LLM-judged eligibility verdict is informational, not regulatory advice — always cross-check against the actual DIC/KID before buying.
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""pea-audit — PEA-eligibility auditor for ETF KID documents.
|
|
2
|
+
|
|
3
|
+
Audits ETF KID/DIC PDFs against French PEA (Plan d'Épargne en Actions)
|
|
4
|
+
rules using a vision LLM. Ships with pluggable LLM backends, a registry
|
|
5
|
+
of KID source URLs by ticker, versioned prompts, and a 13-case regression
|
|
6
|
+
eval suite.
|
|
7
|
+
|
|
8
|
+
Quickstart:
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from pea_audit import audit_pdf, VerdictCache
|
|
11
|
+
from pea_audit.llm import OllamaCloudClient
|
|
12
|
+
|
|
13
|
+
llm = OllamaCloudClient(api_key="sk-...")
|
|
14
|
+
cache = VerdictCache(Path("./cache"))
|
|
15
|
+
|
|
16
|
+
verdict = audit_pdf("kid.pdf", llm=llm, cache=cache)
|
|
17
|
+
print(verdict.eligible, verdict.replication, verdict.isin)
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from .cache import VerdictCache
|
|
21
|
+
from .compare import HARD_FIELDS, SOFT_FIELDS, compare_verdicts
|
|
22
|
+
from .core import (
|
|
23
|
+
DEFAULT_PROMPT_VERSION,
|
|
24
|
+
PEA_VERDICT_SCHEMA,
|
|
25
|
+
Citation,
|
|
26
|
+
PeaVerdict,
|
|
27
|
+
audit_pdf,
|
|
28
|
+
)
|
|
29
|
+
from .isin import extract_isins, isin_check_digit_valid
|
|
30
|
+
from .ticker import TickerAuditResult, audit_ticker, get_cached_verdict
|
|
31
|
+
|
|
32
|
+
__version__ = "0.1.0"
|
|
33
|
+
|
|
34
|
+
__all__ = [
|
|
35
|
+
"__version__",
|
|
36
|
+
# Core audit
|
|
37
|
+
"audit_pdf",
|
|
38
|
+
"audit_ticker",
|
|
39
|
+
"get_cached_verdict",
|
|
40
|
+
# Dataclasses
|
|
41
|
+
"PeaVerdict",
|
|
42
|
+
"Citation",
|
|
43
|
+
"TickerAuditResult",
|
|
44
|
+
# Cache
|
|
45
|
+
"VerdictCache",
|
|
46
|
+
# Comparison (for re-audit cron)
|
|
47
|
+
"compare_verdicts",
|
|
48
|
+
"HARD_FIELDS",
|
|
49
|
+
"SOFT_FIELDS",
|
|
50
|
+
# Schema (for advanced users with custom LLMs)
|
|
51
|
+
"PEA_VERDICT_SCHEMA",
|
|
52
|
+
"DEFAULT_PROMPT_VERSION",
|
|
53
|
+
# ISIN helpers
|
|
54
|
+
"extract_isins",
|
|
55
|
+
"isin_check_digit_valid",
|
|
56
|
+
]
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"""Opt-in file-based verdict cache.
|
|
2
|
+
|
|
3
|
+
The library NEVER writes to disk unless the app explicitly passes a
|
|
4
|
+
`VerdictCache(cache_dir=...)`. Keyed by sha256(pdf_bytes), so the same
|
|
5
|
+
PDF returned from anywhere yields the same cache hit.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import hashlib
|
|
11
|
+
import json
|
|
12
|
+
from dataclasses import asdict
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
from .core import Citation, PeaVerdict
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class VerdictCache:
|
|
19
|
+
"""File-based cache for `PeaVerdict`, keyed by sha256(pdf_bytes)[:16].
|
|
20
|
+
|
|
21
|
+
Example:
|
|
22
|
+
>>> cache = VerdictCache(Path("./cache/audits"))
|
|
23
|
+
>>> v = audit_pdf("doc.pdf", llm=client, cache=cache)
|
|
24
|
+
>>> # next call with the same PDF returns instantly, no LLM hit.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(self, cache_dir: Path):
|
|
28
|
+
self._dir = Path(cache_dir)
|
|
29
|
+
|
|
30
|
+
@staticmethod
|
|
31
|
+
def cache_key(pdf_bytes: bytes) -> str:
|
|
32
|
+
return hashlib.sha256(pdf_bytes).hexdigest()[:16]
|
|
33
|
+
|
|
34
|
+
def _path(self, key: str) -> Path:
|
|
35
|
+
return self._dir / f"{key}.json"
|
|
36
|
+
|
|
37
|
+
def get(self, pdf_bytes: bytes) -> PeaVerdict | None:
|
|
38
|
+
f = self._path(self.cache_key(pdf_bytes))
|
|
39
|
+
if not f.exists():
|
|
40
|
+
return None
|
|
41
|
+
data = json.loads(f.read_text())
|
|
42
|
+
data["evidence"] = [Citation(**c) for c in data.get("evidence", [])]
|
|
43
|
+
return PeaVerdict(**data)
|
|
44
|
+
|
|
45
|
+
def put(self, pdf_bytes: bytes, verdict: PeaVerdict) -> None:
|
|
46
|
+
self._dir.mkdir(parents=True, exist_ok=True)
|
|
47
|
+
self._path(self.cache_key(pdf_bytes)).write_text(
|
|
48
|
+
json.dumps(asdict(verdict), ensure_ascii=False, indent=2)
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
def get_by_path(self, pdf_path: Path) -> PeaVerdict | None:
|
|
52
|
+
"""Convenience: load by PDF file path (for badge lookups)."""
|
|
53
|
+
if not pdf_path.exists():
|
|
54
|
+
return None
|
|
55
|
+
return self.get(pdf_path.read_bytes())
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Verdict comparison — for re-audit cron / change detection."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from .core import PeaVerdict
|
|
6
|
+
|
|
7
|
+
# Hard fields — categorical / deterministic, safe to diff across LLM runs.
|
|
8
|
+
HARD_FIELDS = ("eligible", "replication", "isin")
|
|
9
|
+
|
|
10
|
+
# Soft fields — free-text rendered by the LLM, drifts between runs
|
|
11
|
+
# ("BNP Paribas" vs "BNP Paribas Asset Management"). Ignored by default.
|
|
12
|
+
SOFT_FIELDS = ("issuer", "underlying_index")
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def compare_verdicts(
|
|
16
|
+
old: PeaVerdict,
|
|
17
|
+
new: PeaVerdict,
|
|
18
|
+
include_soft: bool = False,
|
|
19
|
+
) -> list[str]:
|
|
20
|
+
"""Return human-readable diffs between two verdicts.
|
|
21
|
+
|
|
22
|
+
By default compares only the hard fields. `include_soft=True` also
|
|
23
|
+
diffs `issuer` and `underlying_index` (useful for debugging, but
|
|
24
|
+
noisy in a recurring cron).
|
|
25
|
+
"""
|
|
26
|
+
fields = HARD_FIELDS + (SOFT_FIELDS if include_soft else ())
|
|
27
|
+
diffs: list[str] = []
|
|
28
|
+
for field_name in fields:
|
|
29
|
+
old_val = getattr(old, field_name)
|
|
30
|
+
new_val = getattr(new, field_name)
|
|
31
|
+
if old_val != new_val:
|
|
32
|
+
diffs.append(f"{field_name}: {old_val!r} → {new_val!r}")
|
|
33
|
+
return diffs
|