finamt 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- finamt-0.5.0/LICENSE +21 -0
- finamt-0.5.0/MANIFEST.in +1 -0
- finamt-0.5.0/PKG-INFO +421 -0
- finamt-0.5.0/README.md +363 -0
- finamt-0.5.0/pyproject.toml +91 -0
- finamt-0.5.0/setup.cfg +4 -0
- finamt-0.5.0/src/finamt.egg-info/PKG-INFO +421 -0
- finamt-0.5.0/src/finamt.egg-info/SOURCES.txt +47 -0
- finamt-0.5.0/src/finamt.egg-info/dependency_links.txt +1 -0
- finamt-0.5.0/src/finamt.egg-info/entry_points.txt +2 -0
- finamt-0.5.0/src/finamt.egg-info/requires.txt +29 -0
- finamt-0.5.0/src/finamt.egg-info/top_level.txt +1 -0
- finamt-0.5.0/src/finanzamt/__init__.py +50 -0
- finamt-0.5.0/src/finanzamt/__version__.py +5 -0
- finamt-0.5.0/src/finanzamt/agents/__init__.py +20 -0
- finamt-0.5.0/src/finanzamt/agents/agent.py +214 -0
- finamt-0.5.0/src/finanzamt/agents/config.py +188 -0
- finamt-0.5.0/src/finanzamt/agents/llm_caller.py +118 -0
- finamt-0.5.0/src/finanzamt/agents/pipeline.py +287 -0
- finamt-0.5.0/src/finanzamt/agents/prompts.py +136 -0
- finamt-0.5.0/src/finanzamt/cli.py +503 -0
- finamt-0.5.0/src/finanzamt/exceptions.py +46 -0
- finamt-0.5.0/src/finanzamt/models.py +348 -0
- finamt-0.5.0/src/finanzamt/ocr_processor.py +235 -0
- finamt-0.5.0/src/finanzamt/progress.py +60 -0
- finamt-0.5.0/src/finanzamt/storage/__init__.py +27 -0
- finamt-0.5.0/src/finanzamt/storage/base.py +71 -0
- finamt-0.5.0/src/finanzamt/storage/project.py +179 -0
- finamt-0.5.0/src/finanzamt/storage/sqlite.py +670 -0
- finamt-0.5.0/src/finanzamt/tax/__init__.py +16 -0
- finamt-0.5.0/src/finanzamt/tax/ustva.py +277 -0
- finamt-0.5.0/src/finanzamt/ui/api.py +621 -0
- finamt-0.5.0/src/finanzamt/ui/server.py +155 -0
- finamt-0.5.0/src/finanzamt/ui/static/.DS_Store +0 -0
- finamt-0.5.0/src/finanzamt/ui/static/assets/index-B2Qr8JXJ.css +1 -0
- finamt-0.5.0/src/finanzamt/ui/static/assets/index-CQrJNDYi.js +18 -0
- finamt-0.5.0/src/finanzamt/ui/static/finanzamt.svg +20 -0
- finamt-0.5.0/src/finanzamt/ui/static/index.html +14 -0
- finamt-0.5.0/src/finanzamt/utils.py +421 -0
- finamt-0.5.0/tests/test_agent.py +273 -0
- finamt-0.5.0/tests/test_cli_inprocess.py +108 -0
- finamt-0.5.0/tests/test_config.py +157 -0
- finamt-0.5.0/tests/test_exceptions.py +77 -0
- finamt-0.5.0/tests/test_models.py +185 -0
- finamt-0.5.0/tests/test_ocr_processor.py +303 -0
- finamt-0.5.0/tests/test_prompts.py +130 -0
- finamt-0.5.0/tests/test_storage.py +406 -0
- finamt-0.5.0/tests/test_ustva.py +361 -0
- finamt-0.5.0/tests/test_utils.py +377 -0
finamt-0.5.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Yauheniya Varabyova
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
finamt-0.5.0/MANIFEST.in
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
recursive-include src/finanzamt/ui/static *
|
finamt-0.5.0/PKG-INFO
ADDED
|
@@ -0,0 +1,421 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: finamt
|
|
3
|
+
Version: 0.5.0
|
|
4
|
+
Summary: A Python library for extracting key information from receipts and preparing essential German tax return statements.
|
|
5
|
+
Author: Yauheniya Varabyova
|
|
6
|
+
Maintainer: Yauheniya Varabyova
|
|
7
|
+
License: MIT
|
|
8
|
+
Project-URL: Repository, https://github.com/yauheniya-ai/finanzamt
|
|
9
|
+
Project-URL: Changelog, https://github.com/yauheniya-ai/finanzamt/blob/main/CHANGELOG.md
|
|
10
|
+
Keywords: finanzamt,finance,receipts,tax,statement,ocr,extraction
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Financial and Insurance Industry
|
|
13
|
+
Classifier: Intended Audience :: Legal Industry
|
|
14
|
+
Classifier: Intended Audience :: End Users/Desktop
|
|
15
|
+
Classifier: Intended Audience :: Developers
|
|
16
|
+
Classifier: Natural Language :: English
|
|
17
|
+
Classifier: Natural Language :: German
|
|
18
|
+
Classifier: Operating System :: OS Independent
|
|
19
|
+
Classifier: Programming Language :: Python :: 3
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
24
|
+
Classifier: Topic :: Office/Business :: Financial
|
|
25
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
26
|
+
Classifier: Topic :: Text Processing :: Indexing
|
|
27
|
+
Classifier: Topic :: Multimedia :: Graphics :: Capture :: Digital Camera
|
|
28
|
+
Requires-Python: >=3.10
|
|
29
|
+
Description-Content-Type: text/markdown
|
|
30
|
+
License-File: LICENSE
|
|
31
|
+
Requires-Dist: requests>=2.28.0
|
|
32
|
+
Requires-Dist: PyMuPDF>=1.22.0
|
|
33
|
+
Requires-Dist: paddleocr
|
|
34
|
+
Requires-Dist: paddlepaddle
|
|
35
|
+
Requires-Dist: pytesseract>=0.3.10
|
|
36
|
+
Requires-Dist: pydantic
|
|
37
|
+
Requires-Dist: pydantic-settings
|
|
38
|
+
Requires-Dist: Pillow>=9.0.0
|
|
39
|
+
Requires-Dist: numpy>=1.21.0
|
|
40
|
+
Provides-Extra: dev
|
|
41
|
+
Requires-Dist: twine; extra == "dev"
|
|
42
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
43
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
44
|
+
Requires-Dist: pytest-mock; extra == "dev"
|
|
45
|
+
Requires-Dist: black>=22.0; extra == "dev"
|
|
46
|
+
Requires-Dist: flake8>=5.0; extra == "dev"
|
|
47
|
+
Requires-Dist: mypy>=0.991; extra == "dev"
|
|
48
|
+
Requires-Dist: pre-commit>=2.20.0; extra == "dev"
|
|
49
|
+
Provides-Extra: docs
|
|
50
|
+
Requires-Dist: sphinx>=5.0; extra == "docs"
|
|
51
|
+
Requires-Dist: sphinx-rtd-theme>=1.0; extra == "docs"
|
|
52
|
+
Requires-Dist: myst-parser>=0.18; extra == "docs"
|
|
53
|
+
Provides-Extra: ui
|
|
54
|
+
Requires-Dist: fastapi>=0.110; extra == "ui"
|
|
55
|
+
Requires-Dist: uvicorn[standard]>=0.29; extra == "ui"
|
|
56
|
+
Requires-Dist: python-multipart>=0.0.9; extra == "ui"
|
|
57
|
+
Dynamic: license-file
|
|
58
|
+
|
|
59
|
+
# finanzamt
|
|
60
|
+
|
|
61
|
+
<div align="center">
|
|
62
|
+
|
|
63
|
+
[](https://www.python.org/downloads/)
|
|
64
|
+
[](https://opensource.org/licenses/MIT)
|
|
65
|
+
[](https://pypi.org/project/finanzamt/)
|
|
66
|
+
[](https://github.com/yauheniya-ai/finanzamt/actions/workflows/tests.yml)
|
|
67
|
+
[](https://github.com/yauheniya-ai/finanzamt/actions/workflows/tests.yml)
|
|
68
|
+
[](https://github.com/yauheniya-ai/finanzamt/commits/main)
|
|
69
|
+
[](https://pepy.tech/project/finanzamt)
|
|
70
|
+
[](https://readthedocs.org/projects/finanzamt/)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
<img src="https://api.iconify.design/noto-v1:flag-for-flag-united-states.svg" width="16" height="16"> English • <img src="https://api.iconify.design/noto-v1:flag-for-flag-germany.svg" width="16" height="16"> [German](https://github.com/yauheniya-ai/finanzamt/blob/main/readme/README_de.md)
|
|
74
|
+
|
|
75
|
+
</div>
|
|
76
|
+
|
|
77
|
+
A Python library for extracting structured data from receipts and invoices and preparing essential German VAT statements.
|
|
78
|
+
|
|
79
|
+
## Features
|
|
80
|
+
|
|
81
|
+
- **German Tax Alignment** — Category taxonomy and VAT handling aligned with German fiscal practice (Vorsteuer / Umsatzsteuer, UStVA line numbers)
|
|
82
|
+
- **Local-First** — Everything runs locally and completely offline; no data leaves your machine
|
|
83
|
+
- **4-Agent Pipeline** — Sequential specialised agents for metadata, counterparty, amounts, and line items; short focused prompts for reliable local model performance
|
|
84
|
+
- **Purchases and Sales** — Handles both incoming invoices (Eingangsrechnungen) and outgoing invoices (Ausgangsrechnungen)
|
|
85
|
+
- **Counterparty Deduplication** — Vendors and clients are stored once and reused across receipts
|
|
86
|
+
- **Web UI** — Full browser interface for uploading, reviewing, editing, and managing receipts
|
|
87
|
+
|
|
88
|
+
## Tech Stack
|
|
89
|
+
|
|
90
|
+
- <img src="https://api.iconify.design/devicon:python.svg" width="16" height="16"> Python — package language
|
|
91
|
+
- <img src="https://api.iconify.design/devicon:fastapi.svg" width="16" height="16"> FastAPI — backend for the web UI
|
|
92
|
+
- <img src="https://api.iconify.design/devicon:react.svg" width="16" height="16"> React — interactive frontend
|
|
93
|
+
- <img src="https://api.iconify.design/simple-icons:paddlepaddle.svg" width="16" height="16"> PaddleOCR — OCR for scanned PDFs
|
|
94
|
+
- <img src="https://api.iconify.design/devicon:google.svg" width="16" height="16"> Tesseract — OCR for scanned PDFs and images when PaddleOCR fails or times out
|
|
95
|
+
- <img src="https://api.iconify.design/devicon:ollama.svg" width="16" height="16"> Ollama — local LLMs for structured extraction of information from receipts and invoices
|
|
96
|
+
- <img src="https://api.iconify.design/hugeicons:qwen.svg" width="16" height="16"> Qwen – laptop-compatible LLMs with qwen2.5:7b-instruct-q4_K_M currently as preferred default for text-based extraction
|
|
97
|
+
- <img src="https://api.iconify.design/devicon:sqlite.svg" width="16" height="16"> SQLite – local database for original receipts and extracted data
|
|
98
|
+
|
|
99
|
+
## Installation
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
pip install finanzamt
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
### System Requirements
|
|
106
|
+
|
|
107
|
+
- Python 3.10+
|
|
108
|
+
- Ollama running locally with a supported model pulled
|
|
109
|
+
- Tesseract OCR (optional fallback when PaddleOCR times out)
|
|
110
|
+
|
|
111
|
+
#### Ollama
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
# Install Ollama
|
|
115
|
+
curl -fsSL https://ollama.ai/install.sh | sh
|
|
116
|
+
|
|
117
|
+
# Pull a model — qwen2.5 7B is the recommended default
|
|
118
|
+
ollama pull qwen2.5:7b-instruct-q4_K_M
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
Other models that work well: `qwen3:8b`, `llama3.2`, `llama3.1`.
|
|
122
|
+
|
|
123
|
+
#### Tesseract OCR (optional fallback from PaddleOCR)
|
|
124
|
+
|
|
125
|
+
**Ubuntu / Debian**
|
|
126
|
+
```bash
|
|
127
|
+
sudo apt-get install tesseract-ocr tesseract-ocr-deu
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
**macOS**
|
|
131
|
+
```bash
|
|
132
|
+
brew install tesseract tesseract-lang
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
**Windows**
|
|
136
|
+
|
|
137
|
+
Download the installer from https://github.com/UB-Mannheim/tesseract/wiki and add it to your `PATH`.
|
|
138
|
+
|
|
139
|
+
## Quick Start
|
|
140
|
+
|
|
141
|
+
### Interactive UI
|
|
142
|
+
|
|
143
|
+
```bash
|
|
144
|
+
pip install "finanzamt[ui]"
|
|
145
|
+
finanzamt --ui
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
<p align="center">
|
|
149
|
+
<img src="https://raw.githubusercontent.com/yauheniya-ai/finanzamt/main/docs/images/Demo.webp" width="100%" />
|
|
150
|
+
<em>Interactive UI to upload receipts and manage tax statements</em>
|
|
151
|
+
</p>
|
|
152
|
+
|
|
153
|
+
### Python API
|
|
154
|
+
|
|
155
|
+
#### Process a single receipt (expense)
|
|
156
|
+
|
|
157
|
+
```python
|
|
158
|
+
from finanzamt import FinanceAgent
|
|
159
|
+
|
|
160
|
+
agent = FinanceAgent()
|
|
161
|
+
result = agent.process_receipt("receipt.pdf")
|
|
162
|
+
|
|
163
|
+
if result.success:
|
|
164
|
+
data = result.data
|
|
165
|
+
print(f"Counterparty: {data.vendor}")
|
|
166
|
+
print(f"Date: {data.receipt_date}")
|
|
167
|
+
print(f"Total: {data.total_amount} EUR")
|
|
168
|
+
print(f"VAT: {data.vat_percentage}% ({data.vat_amount} EUR)")
|
|
169
|
+
print(f"Net: {data.net_amount} EUR")
|
|
170
|
+
print(f"Category: {data.category}")
|
|
171
|
+
print(f"Items: {len(data.items)}")
|
|
172
|
+
|
|
173
|
+
# Serialise to JSON
|
|
174
|
+
with open("extracted.json", "w", encoding="utf-8") as f:
|
|
175
|
+
f.write(data.to_json())
|
|
176
|
+
else:
|
|
177
|
+
print(f"Extraction failed: {result.error_message}")
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
#### Sale invoices (outgoing)
|
|
181
|
+
|
|
182
|
+
```python
|
|
183
|
+
result = agent.process_receipt("invoice_to_client.pdf", receipt_type="sale")
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
#### Batch processing
|
|
187
|
+
|
|
188
|
+
```python
|
|
189
|
+
from pathlib import Path
|
|
190
|
+
from finanzamt import FinanceAgent
|
|
191
|
+
|
|
192
|
+
agent = FinanceAgent()
|
|
193
|
+
results = agent.batch_process(list(Path("receipts/").glob("*.pdf")))
|
|
194
|
+
|
|
195
|
+
for path, result in results.items():
|
|
196
|
+
if result.success:
|
|
197
|
+
print(f"{path}: {result.data.total_amount} EUR")
|
|
198
|
+
else:
|
|
199
|
+
print(f"{path}: ERROR — {result.error_message}")
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
## Configuration
|
|
203
|
+
|
|
204
|
+
Settings are read in priority order from: environment variables → `.env` file → built-in defaults.
|
|
205
|
+
|
|
206
|
+
```bash
|
|
207
|
+
# .env
|
|
208
|
+
|
|
209
|
+
# OCR and general settings
|
|
210
|
+
FINANZAMT_OLLAMA_BASE_URL=http://localhost:11434
|
|
211
|
+
FINANZAMT_OCR_LANGUAGE=german
|
|
212
|
+
FINANZAMT_OCR_TIMEOUT=60
|
|
213
|
+
FINANZAMT_TESSERACT_CMD=tesseract
|
|
214
|
+
FINANZAMT_OCR_PREPROCESS=true
|
|
215
|
+
FINANZAMT_PDF_DPI=150
|
|
216
|
+
|
|
217
|
+
# Extraction agents — all 4 agents use this model
|
|
218
|
+
FINANZAMT_AGENT_MODEL=qwen2.5:7b-instruct-q4_K_M
|
|
219
|
+
FINANZAMT_AGENT_TIMEOUT=60
|
|
220
|
+
FINANZAMT_AGENT_NUM_CTX=4096
|
|
221
|
+
FINANZAMT_AGENT_MAX_RETRIES=2
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
You can also pass config objects directly:
|
|
225
|
+
|
|
226
|
+
```python
|
|
227
|
+
from finanzamt import FinanceAgent
|
|
228
|
+
from finanzamt.agents.config import Config, AgentsConfig
|
|
229
|
+
|
|
230
|
+
agent = FinanceAgent(
|
|
231
|
+
config=Config(ocr_language="deu+eng", pdf_dpi=150),
|
|
232
|
+
agents_cfg=AgentsConfig(agent_model="qwen3:8b"),
|
|
233
|
+
)
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
## API Reference
|
|
237
|
+
|
|
238
|
+
### FinanceAgent
|
|
239
|
+
|
|
240
|
+
```python
|
|
241
|
+
class FinanceAgent:
|
|
242
|
+
def __init__(
|
|
243
|
+
self,
|
|
244
|
+
config: Config | None = None,
|
|
245
|
+
db_path: str | Path | None = "~/.finanzamt/finanzamt.db",
|
|
246
|
+
agents_cfg: AgentsConfig | None = None,
|
|
247
|
+
) -> None: ...
|
|
248
|
+
|
|
249
|
+
def process_receipt(
|
|
250
|
+
self,
|
|
251
|
+
pdf_path: str | Path | bytes,
|
|
252
|
+
receipt_type: str = "purchase", # "purchase" or "sale"
|
|
253
|
+
) -> ExtractionResult: ...
|
|
254
|
+
|
|
255
|
+
def batch_process(
|
|
256
|
+
self,
|
|
257
|
+
pdf_paths: list[str | Path],
|
|
258
|
+
receipt_type: str = "purchase",
|
|
259
|
+
) -> dict[str, ExtractionResult]: ...
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
### ExtractionResult
|
|
263
|
+
|
|
264
|
+
Always check `success` before accessing `data`.
|
|
265
|
+
|
|
266
|
+
```python
|
|
267
|
+
@dataclass
|
|
268
|
+
class ExtractionResult:
|
|
269
|
+
success: bool
|
|
270
|
+
data: ReceiptData | None
|
|
271
|
+
error_message: str | None
|
|
272
|
+
duplicate: bool # True if already in the database
|
|
273
|
+
existing_id: str | None # ID of the original if duplicate
|
|
274
|
+
processing_time: float | None # seconds
|
|
275
|
+
|
|
276
|
+
def to_dict(self) -> dict: ...
|
|
277
|
+
```
|
|
278
|
+
|
|
279
|
+
### ReceiptData
|
|
280
|
+
|
|
281
|
+
```python
|
|
282
|
+
@dataclass
|
|
283
|
+
class ReceiptData:
|
|
284
|
+
id: str # SHA-256 of OCR text — stable dedup key
|
|
285
|
+
receipt_type: ReceiptType # "purchase" or "sale"
|
|
286
|
+
counterparty: Counterparty | None # vendor (purchase) or client (sale)
|
|
287
|
+
receipt_number: str | None
|
|
288
|
+
receipt_date: datetime | None
|
|
289
|
+
total_amount: Decimal | None
|
|
290
|
+
vat_percentage: Decimal | None # e.g. Decimal("19.0")
|
|
291
|
+
vat_amount: Decimal | None
|
|
292
|
+
net_amount: Decimal | None # computed: total - vat
|
|
293
|
+
category: ReceiptCategory
|
|
294
|
+
items: list[ReceiptItem]
|
|
295
|
+
vat_splits: list[dict] # for mixed-rate invoices
|
|
296
|
+
|
|
297
|
+
vendor: str | None # alias for counterparty.name
|
|
298
|
+
|
|
299
|
+
def to_dict(self) -> dict: ...
|
|
300
|
+
def to_json(self) -> str: ...
|
|
301
|
+
```
|
|
302
|
+
|
|
303
|
+
### Counterparty
|
|
304
|
+
|
|
305
|
+
```python
|
|
306
|
+
@dataclass
|
|
307
|
+
class Counterparty:
|
|
308
|
+
id: str # UUID assigned by the database
|
|
309
|
+
name: str | None
|
|
310
|
+
vat_id: str | None # EU format, e.g. DE123456789
|
|
311
|
+
tax_number: str | None # German Steuernummer, e.g. 123/456/78901
|
|
312
|
+
address: Address
|
|
313
|
+
verified: bool # manually confirmed in the UI
|
|
314
|
+
```
|
|
315
|
+
|
|
316
|
+
### ReceiptItem
|
|
317
|
+
|
|
318
|
+
```python
|
|
319
|
+
@dataclass
|
|
320
|
+
class ReceiptItem:
|
|
321
|
+
position: int | None
|
|
322
|
+
description: str
|
|
323
|
+
quantity: Decimal | None
|
|
324
|
+
unit_price: Decimal | None
|
|
325
|
+
total_price: Decimal | None
|
|
326
|
+
vat_rate: Decimal | None
|
|
327
|
+
vat_amount: Decimal | None
|
|
328
|
+
category: ReceiptCategory
|
|
329
|
+
|
|
330
|
+
def to_dict(self) -> dict: ...
|
|
331
|
+
```
|
|
332
|
+
|
|
333
|
+
### ReceiptCategory
|
|
334
|
+
|
|
335
|
+
A validated string subclass. Invalid values are silently normalised to `"other"`.
|
|
336
|
+
|
|
337
|
+
```python
|
|
338
|
+
from finanzamt.agents.prompts import RECEIPT_CATEGORIES # list[str]
|
|
339
|
+
from finanzamt.models import ReceiptCategory
|
|
340
|
+
|
|
341
|
+
cat = ReceiptCategory("software") # valid
|
|
342
|
+
cat = ReceiptCategory("unknown_value") # normalised to "other"
|
|
343
|
+
cat = ReceiptCategory.other() # explicit fallback
|
|
344
|
+
```
|
|
345
|
+
|
|
346
|
+
### Exceptions
|
|
347
|
+
|
|
348
|
+
All exceptions inherit from `FinanceAgentError`.
|
|
349
|
+
|
|
350
|
+
| Exception | Raised when |
|
|
351
|
+
|---|---|
|
|
352
|
+
| `OCRProcessingError` | PDF cannot be opened or text extraction fails |
|
|
353
|
+
| `LLMExtractionError` | Ollama is unreachable or returns invalid JSON after all retries |
|
|
354
|
+
| `InvalidReceiptError` | Extracted data fails business-logic validation |
|
|
355
|
+
|
|
356
|
+
```python
|
|
357
|
+
from finanzamt.exceptions import FinanceAgentError, OCRProcessingError
|
|
358
|
+
|
|
359
|
+
try:
|
|
360
|
+
result = agent.process_receipt("scan.pdf")
|
|
361
|
+
except OCRProcessingError as e:
|
|
362
|
+
print(e)
|
|
363
|
+
```
|
|
364
|
+
|
|
365
|
+
## Extraction Pipeline
|
|
366
|
+
|
|
367
|
+
Each receipt goes through four sequential LLM calls, each with a short focused prompt:
|
|
368
|
+
|
|
369
|
+
| Agent | Extracts |
|
|
370
|
+
|---|---|
|
|
371
|
+
| Agent 1 | Receipt number, date, category |
|
|
372
|
+
| Agent 2 | Counterparty name, VAT ID, Steuernummer, address |
|
|
373
|
+
| Agent 3 | Total amount, VAT percentage, VAT amount |
|
|
374
|
+
| Agent 4 | Line items (description, VAT rate, VAT amount, price) |
|
|
375
|
+
|
|
376
|
+
Results are merged in Python — no additional LLM validation step. Debug output for every agent (prompt, raw response, parsed JSON) is saved to `~/.finanzamt/debug/<receipt_id>/`.
|
|
377
|
+
|
|
378
|
+
## Supported Categories
|
|
379
|
+
|
|
380
|
+
| Category | Typical content | Direction |
|
|
381
|
+
|---|---|---|
|
|
382
|
+
| `material` | Paper, office consumables, raw materials | purchase |
|
|
383
|
+
| `equipment` | Hardware, printers, monitors, machines | purchase |
|
|
384
|
+
| `software` | Licences, SaaS subscriptions, cloud services | purchase |
|
|
385
|
+
| `internet` | Hosting, domains, broadband | purchase |
|
|
386
|
+
| `telecommunication` | Mobile contracts, SIM, telephone | purchase |
|
|
387
|
+
| `travel` | Flights, rail, hotels, taxis, car rental | purchase |
|
|
388
|
+
| `education` | Courses, books, certifications, seminars | purchase |
|
|
389
|
+
| `utilities` | Electricity, gas, water, heating | purchase |
|
|
390
|
+
| `insurance` | Liability, health, property insurance | purchase |
|
|
391
|
+
| `taxes` | Tax advisory, filing fees, government charges | purchase |
|
|
392
|
+
| `services` | Freelance / service work billed to a client | sale |
|
|
393
|
+
| `consulting` | Advisory or consulting project billed to a client | sale |
|
|
394
|
+
| `products` | Physical goods sold to a client | sale |
|
|
395
|
+
| `licensing` | Software or IP rights licensed to a client | sale |
|
|
396
|
+
| `other` | Anything that does not match the above | either |
|
|
397
|
+
|
|
398
|
+
## TODO
|
|
399
|
+
|
|
400
|
+
- [x] Receipt parsing
|
|
401
|
+
- [x] Tax calculation engine
|
|
402
|
+
- [ ] ELSTER field mapper
|
|
403
|
+
- [ ] XML generator
|
|
404
|
+
- [ ] XSD validator
|
|
405
|
+
|
|
406
|
+
## Contributing
|
|
407
|
+
|
|
408
|
+
1. Fork the repository
|
|
409
|
+
2. Create a feature branch (`git checkout -b feature/my-change`)
|
|
410
|
+
3. Make your changes
|
|
411
|
+
4. Run the test suite: `pytest --cov=src --cov-report=term-missing`
|
|
412
|
+
5. Submit a pull request
|
|
413
|
+
|
|
414
|
+
## License
|
|
415
|
+
|
|
416
|
+
MIT — see [LICENSE](https://raw.githubusercontent.com/yauheniya-ai/finanzamt/main/LICENSE) for details.
|
|
417
|
+
|
|
418
|
+
## Disclaimer
|
|
419
|
+
|
|
420
|
+
`finanzamt` is an independent open-source project and is not affiliated with,
|
|
421
|
+
endorsed by, or associated with German tax authorities or ELSTER.
|