fintl 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fintl-0.1.0/PKG-INFO +112 -0
- fintl-0.1.0/README.md +78 -0
- fintl-0.1.0/pyproject.toml +63 -0
- fintl-0.1.0/src/fintl/__init__.py +0 -0
- fintl-0.1.0/src/fintl/accounts_etl/__init__.py +0 -0
- fintl-0.1.0/src/fintl/accounts_etl/dkb/__init__.py +0 -0
- fintl-0.1.0/src/fintl/accounts_etl/dkb/credit0.py +242 -0
- fintl-0.1.0/src/fintl/accounts_etl/dkb/festgeld0.py +245 -0
- fintl-0.1.0/src/fintl/accounts_etl/dkb/files.py +230 -0
- fintl-0.1.0/src/fintl/accounts_etl/dkb/giro0.py +249 -0
- fintl-0.1.0/src/fintl/accounts_etl/dkb/giro202307.py +251 -0
- fintl-0.1.0/src/fintl/accounts_etl/dkb/giro202312.py +263 -0
- fintl-0.1.0/src/fintl/accounts_etl/dkb/plugin.py +95 -0
- fintl-0.1.0/src/fintl/accounts_etl/dkb/tagesgeld0.py +268 -0
- fintl-0.1.0/src/fintl/accounts_etl/dkb/tagesgeld202307.py +257 -0
- fintl-0.1.0/src/fintl/accounts_etl/dkb/tagesgeld202312.py +261 -0
- fintl-0.1.0/src/fintl/accounts_etl/exceptions.py +6 -0
- fintl-0.1.0/src/fintl/accounts_etl/file_helper.py +143 -0
- fintl-0.1.0/src/fintl/accounts_etl/files.py +62 -0
- fintl-0.1.0/src/fintl/accounts_etl/gls/__init__.py +0 -0
- fintl-0.1.0/src/fintl/accounts_etl/gls/credit0.py +94 -0
- fintl-0.1.0/src/fintl/accounts_etl/gls/giro0.py +91 -0
- fintl-0.1.0/src/fintl/accounts_etl/gls/helper.py +204 -0
- fintl-0.1.0/src/fintl/accounts_etl/gls/plugin.py +38 -0
- fintl-0.1.0/src/fintl/accounts_etl/labels.py +57 -0
- fintl-0.1.0/src/fintl/accounts_etl/postbank/__init__.py +0 -0
- fintl-0.1.0/src/fintl/accounts_etl/postbank/giro0.py +250 -0
- fintl-0.1.0/src/fintl/accounts_etl/postbank/giro202305.py +271 -0
- fintl-0.1.0/src/fintl/accounts_etl/postbank/plugin.py +32 -0
- fintl-0.1.0/src/fintl/accounts_etl/process_accounts.py +99 -0
- fintl-0.1.0/src/fintl/accounts_etl/registry.py +28 -0
- fintl-0.1.0/src/fintl/accounts_etl/runner.py +205 -0
- fintl-0.1.0/src/fintl/accounts_etl/scalable/__init__.py +0 -0
- fintl-0.1.0/src/fintl/accounts_etl/scalable/broker0.py +209 -0
- fintl-0.1.0/src/fintl/accounts_etl/scalable/broker20231028.py +165 -0
- fintl-0.1.0/src/fintl/accounts_etl/scalable/broker20260309.py +293 -0
- fintl-0.1.0/src/fintl/accounts_etl/scalable/files.py +135 -0
- fintl-0.1.0/src/fintl/accounts_etl/scalable/plugin.py +47 -0
- fintl-0.1.0/src/fintl/accounts_etl/schemas.py +336 -0
- fintl-0.1.0/src/fintl/accounts_etl/store.py +169 -0
- fintl-0.1.0/src/fintl/accounts_etl/utils.py +149 -0
- fintl-0.1.0/src/fintl/cli/README.md +392 -0
- fintl-0.1.0/src/fintl/cli/__init__.py +0 -0
- fintl-0.1.0/src/fintl/cli/etl.py +23 -0
- fintl-0.1.0/src/fintl/cli/main.py +27 -0
- fintl-0.1.0/src/fintl/cli/plot.py +46 -0
- fintl-0.1.0/src/fintl/cli/search.py +388 -0
- fintl-0.1.0/src/fintl/cli/search.tcss +45 -0
- fintl-0.1.0/src/fintl/cli/store.py +121 -0
- fintl-0.1.0/src/fintl/fine_logging/__init__.py +237 -0
- fintl-0.1.0/src/fintl/path_utils.py +20 -0
fintl-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: fintl
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Financial ETL CLI: parse, normalize, and explore your bank transaction data.
|
|
5
|
+
Keywords: finance,etl,banking,transactions,cli
|
|
6
|
+
Author: eschmidt42
|
|
7
|
+
Author-email: eschmidt42 <11818904+eschmidt42@users.noreply.github.com>
|
|
8
|
+
License: MIT
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Environment :: Console
|
|
11
|
+
Classifier: Intended Audience :: End Users/Desktop
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Classifier: Topic :: Office/Business :: Financial
|
|
17
|
+
Requires-Dist: altair>=6.0.0
|
|
18
|
+
Requires-Dist: beautifulsoup4>=4.14.3
|
|
19
|
+
Requires-Dist: chardet>=7.3.0
|
|
20
|
+
Requires-Dist: httpx>=0.28.1
|
|
21
|
+
Requires-Dist: instructor>=1.14.5
|
|
22
|
+
Requires-Dist: polars>=1.39.3
|
|
23
|
+
Requires-Dist: pydantic>=2.12.5
|
|
24
|
+
Requires-Dist: pydantic-settings>=2.13.1
|
|
25
|
+
Requires-Dist: python-dateutil>=2.9.0.post0
|
|
26
|
+
Requires-Dist: rich>=14.3.3
|
|
27
|
+
Requires-Dist: textual>=8.1.1
|
|
28
|
+
Requires-Dist: xlsxwriter>=3.2.9
|
|
29
|
+
Requires-Python: >=3.13
|
|
30
|
+
Project-URL: Homepage, https://github.com/eschmidt42/fintl
|
|
31
|
+
Project-URL: Repository, https://github.com/eschmidt42/fintl
|
|
32
|
+
Project-URL: Issues, https://github.com/eschmidt42/fintl/issues
|
|
33
|
+
Description-Content-Type: text/markdown
|
|
34
|
+
|
|
35
|
+
# `fintl`
|
|
36
|
+
|
|
37
|
+
[](https://github.com/eschmidt42/fintl/actions/workflows/ci.yml)
|
|
38
|
+
[](https://codecov.io/gh/eschmidt42/fintl)
|
|
39
|
+
[](https://opensource.org/licenses/MIT)
|
|
40
|
+
[](https://www.python.org/)
|
|
41
|
+
[](https://github.com/astral-sh/ruff)
|
|
42
|
+
[](https://github.com/astral-sh/uv)
|
|
43
|
+
[](https://github.com/astral-sh/ty)
|
|
44
|
+
|
|
45
|
+
> Financial ETL CLI: parse, normalize, and explore your bank transaction data.
|
|
46
|
+
|
|
47
|
+
## TL;DR
|
|
48
|
+
|
|
49
|
+
This tool helps you process, visualize and search your balance and transaction information that you have exported from your bank accounts.
|
|
50
|
+
|
|
51
|
+
Currently supports DKB, Postbank, GLS and Scalable Capital Broker.
|
|
52
|
+
|
|
53
|
+
Supported file formats: CSV, HTML, and PNG. PNG parsing uses a local [ollama](https://ollama.com) instance with a multimodal model — opt-in via `fintl.toml` (required only for Scalable broker PNG statements; gracefully skipped when not configured).
|
|
54
|
+
|
|
55
|
+
**All your data stays on your machine. No need to trust another entity that is PSD2 certified.**
|
|
56
|
+
|
|
57
|
+
## How to install
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
git clone https://github.com/eschmidt42/fintl.git
|
|
61
|
+
cd fintl
|
|
62
|
+
uv sync
|
|
63
|
+
uv tool install .
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
After installation, `fintl` should be available on your `PATH`:
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
which fintl
|
|
70
|
+
# e.g. /Users/YOURUSER/.local/bin/fintl
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## How to use
|
|
74
|
+
|
|
75
|
+
1. Configure your `~/.config/petprojects/fintl.toml`. For details see [here](./src/fintl/cli/README.md#fintltoml).
|
|
76
|
+
2. Go to your bank account.
|
|
77
|
+
3. Select your service, e.g. Giro.
|
|
78
|
+
4. Export csv file or similar to `~/Downloads`, or directly your source dir for your bank / service.
|
|
79
|
+
5. Optionally, if you've stored your file in `~/Downloads`, run `cd ~/Downloads` followed by `fintl store` (uses your `fintl.toml` from step 1).
|
|
80
|
+
6. Optionally, if you want to process PNG screenshots via Ollama, start Ollama.
|
|
81
|
+
7. Run the etl via `fintl etl` (also uses your `fintl.toml` from step 1).
|
|
82
|
+
8. Upon success visualize / search your data via `fintl plot` or `fintl search`.
|
|
83
|
+
|
|
84
|
+
[Please see here and below](./src/fintl/cli/README.md#top-level-usage) for more usage details.
|
|
85
|
+
|
|
86
|
+
## Repo structure
|
|
87
|
+
|
|
88
|
+
* `src/fintl/accounts_etl/` — core ETL logic: schemas, parsers, registry, runner
|
|
89
|
+
* `src/fintl/cli/` — CLI entry point and subcommands (`etl`, `store`, `search`, `plot`)
|
|
90
|
+
* `tests/` — tests for packages of this repo
|
|
91
|
+
|
|
92
|
+
## Development
|
|
93
|
+
|
|
94
|
+
Run tests:
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
uv run pytest -n auto tests
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
Type check:
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
uv run ty check src
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
Lint, format, type check, test and all the other good stuff:
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
pre-commit run --all-files
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
For release steps see [here](./docs/releases.md).
|
fintl-0.1.0/README.md
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# `fintl`
|
|
2
|
+
|
|
3
|
+
[](https://github.com/eschmidt42/fintl/actions/workflows/ci.yml)
|
|
4
|
+
[](https://codecov.io/gh/eschmidt42/fintl)
|
|
5
|
+
[](https://opensource.org/licenses/MIT)
|
|
6
|
+
[](https://www.python.org/)
|
|
7
|
+
[](https://github.com/astral-sh/ruff)
|
|
8
|
+
[](https://github.com/astral-sh/uv)
|
|
9
|
+
[](https://github.com/astral-sh/ty)
|
|
10
|
+
|
|
11
|
+
> Financial ETL CLI: parse, normalize, and explore your bank transaction data.
|
|
12
|
+
|
|
13
|
+
## TL;DR
|
|
14
|
+
|
|
15
|
+
This tool helps you process, visualize and search your balance and transaction information that you have exported from your bank accounts.
|
|
16
|
+
|
|
17
|
+
Currently supports DKB, Postbank, GLS and Scalable Capital Broker.
|
|
18
|
+
|
|
19
|
+
Supported file formats: CSV, HTML, and PNG. PNG parsing uses a local [ollama](https://ollama.com) instance with a multimodal model — opt-in via `fintl.toml` (required only for Scalable broker PNG statements; gracefully skipped when not configured).
|
|
20
|
+
|
|
21
|
+
**All your data stays on your machine. No need to trust another entity that is PSD2 certified.**
|
|
22
|
+
|
|
23
|
+
## How to install
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
git clone https://github.com/eschmidt42/fintl.git
|
|
27
|
+
cd fintl
|
|
28
|
+
uv sync
|
|
29
|
+
uv tool install .
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
After installation, `fintl` should be available on your `PATH`:
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
which fintl
|
|
36
|
+
# e.g. /Users/YOURUSER/.local/bin/fintl
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## How to use
|
|
40
|
+
|
|
41
|
+
1. Configure your `~/.config/petprojects/fintl.toml`. For details see [here](./src/fintl/cli/README.md#fintltoml).
|
|
42
|
+
2. Go to your bank account.
|
|
43
|
+
3. Select your service, e.g. Giro.
|
|
44
|
+
4. Export csv file or similar to `~/Downloads`, or directly your source dir for your bank / service.
|
|
45
|
+
5. Optionally, if you've stored your file in `~/Downloads`, run `cd ~/Downloads` followed by `fintl store` (uses your `fintl.toml` from step 1).
|
|
46
|
+
6. Optionally, if you want to process PNG screenshots via Ollama, start Ollama.
|
|
47
|
+
7. Run the etl via `fintl etl` (also uses your `fintl.toml` from step 1).
|
|
48
|
+
8. Upon success visualize / search your data via `fintl plot` or `fintl search`.
|
|
49
|
+
|
|
50
|
+
[Please see here and below](./src/fintl/cli/README.md#top-level-usage) for more usage details.
|
|
51
|
+
|
|
52
|
+
## Repo structure
|
|
53
|
+
|
|
54
|
+
* `src/fintl/accounts_etl/` — core ETL logic: schemas, parsers, registry, runner
|
|
55
|
+
* `src/fintl/cli/` — CLI entry point and subcommands (`etl`, `store`, `search`, `plot`)
|
|
56
|
+
* `tests/` — tests for packages of this repo
|
|
57
|
+
|
|
58
|
+
## Development
|
|
59
|
+
|
|
60
|
+
Run tests:
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
uv run pytest -n auto tests
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
Type check:
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
uv run ty check src
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Lint, format, type check, test and all the other good stuff:
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
pre-commit run --all-files
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
For release steps see [here](./docs/releases.md).
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "fintl"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Financial ETL CLI: parse, normalize, and explore your bank transaction data."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
license = { text = "MIT" }
|
|
7
|
+
keywords = ["finance", "etl", "banking", "transactions", "cli"]
|
|
8
|
+
classifiers = [
|
|
9
|
+
"Development Status :: 3 - Alpha",
|
|
10
|
+
"Environment :: Console",
|
|
11
|
+
"Intended Audience :: End Users/Desktop",
|
|
12
|
+
"License :: OSI Approved :: MIT License",
|
|
13
|
+
"Operating System :: OS Independent",
|
|
14
|
+
"Programming Language :: Python :: 3",
|
|
15
|
+
"Programming Language :: Python :: 3.13",
|
|
16
|
+
"Topic :: Office/Business :: Financial",
|
|
17
|
+
]
|
|
18
|
+
authors = [
|
|
19
|
+
{ name = "eschmidt42", email = "11818904+eschmidt42@users.noreply.github.com" }
|
|
20
|
+
]
|
|
21
|
+
requires-python = ">=3.13"
|
|
22
|
+
dependencies = [
|
|
23
|
+
"altair>=6.0.0",
|
|
24
|
+
"beautifulsoup4>=4.14.3",
|
|
25
|
+
"chardet>=7.3.0",
|
|
26
|
+
"httpx>=0.28.1",
|
|
27
|
+
"instructor>=1.14.5",
|
|
28
|
+
"polars>=1.39.3",
|
|
29
|
+
"pydantic>=2.12.5",
|
|
30
|
+
"pydantic-settings>=2.13.1",
|
|
31
|
+
"python-dateutil>=2.9.0.post0",
|
|
32
|
+
"rich>=14.3.3",
|
|
33
|
+
"textual>=8.1.1",
|
|
34
|
+
"xlsxwriter>=3.2.9",
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
[project.urls]
|
|
38
|
+
Homepage = "https://github.com/eschmidt42/fintl"
|
|
39
|
+
Repository = "https://github.com/eschmidt42/fintl"
|
|
40
|
+
Issues = "https://github.com/eschmidt42/fintl/issues"
|
|
41
|
+
|
|
42
|
+
[project.scripts]
|
|
43
|
+
fintl = "fintl.cli.main:app"
|
|
44
|
+
|
|
45
|
+
[build-system]
|
|
46
|
+
requires = ["uv_build>=0.6.6,<0.11"]
|
|
47
|
+
build-backend = "uv_build"
|
|
48
|
+
|
|
49
|
+
[dependency-groups]
|
|
50
|
+
dev = [
|
|
51
|
+
"inline-snapshot>=0.32.5",
|
|
52
|
+
"notebook>=7.5.5",
|
|
53
|
+
"pre-commit>=4.5.1",
|
|
54
|
+
"pytest>=9.0.2",
|
|
55
|
+
"pytest-cov>=7.1.0",
|
|
56
|
+
"pytest-xdist>=3.8.0",
|
|
57
|
+
"ruff>=0.15.7",
|
|
58
|
+
"ty>=0.0.25",
|
|
59
|
+
"vulture>=2.16",
|
|
60
|
+
]
|
|
61
|
+
|
|
62
|
+
[tool.inline-snapshot]
|
|
63
|
+
format-command="ruff check --fix-only --stdin-filename {filename} | ruff format --stdin-filename {filename}"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import logging
|
|
3
|
+
import re
|
|
4
|
+
import typing as T
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import polars as pl
|
|
8
|
+
|
|
9
|
+
from fintl.accounts_etl.exceptions import (
|
|
10
|
+
ExtractBalanceException,
|
|
11
|
+
ExtractTransactionsException,
|
|
12
|
+
)
|
|
13
|
+
from fintl.accounts_etl.file_helper import (
|
|
14
|
+
concatenate_new_information_to_history,
|
|
15
|
+
detect_new_raw_files,
|
|
16
|
+
detect_relevant_target_files,
|
|
17
|
+
get_parser_source_files,
|
|
18
|
+
store_balance,
|
|
19
|
+
store_transactions,
|
|
20
|
+
)
|
|
21
|
+
from fintl.accounts_etl.files import copy_new_files, load_lines, select_files_to_copy
|
|
22
|
+
from fintl.accounts_etl.schemas import (
|
|
23
|
+
HASH_COLUMNS,
|
|
24
|
+
TRANSACTION_COLUMNS,
|
|
25
|
+
BalanceInfo,
|
|
26
|
+
Case,
|
|
27
|
+
Config,
|
|
28
|
+
DKBCreditParserEnum,
|
|
29
|
+
ProviderEnum,
|
|
30
|
+
ServiceEnum,
|
|
31
|
+
)
|
|
32
|
+
from fintl.accounts_etl.utils import (
|
|
33
|
+
detect_encoding,
|
|
34
|
+
find_line_with_pattern,
|
|
35
|
+
german_string_numbers_to_floats,
|
|
36
|
+
hash_transactions,
|
|
37
|
+
verify_transactions,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
logger = logging.getLogger(__name__)
|
|
41
|
+
|
|
42
|
+
CASE = Case(
|
|
43
|
+
provider=ProviderEnum.dkb.value,
|
|
44
|
+
service=ServiceEnum.credit.value,
|
|
45
|
+
parser=DKBCreditParserEnum.credit0.value,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def check_if_parser_applies(file_path: Path) -> bool:
|
|
50
|
+
is_file_name_match = (
|
|
51
|
+
re.search(
|
|
52
|
+
r"(\d{4}-\d{2}-\d{2}_to_\d{4}-\d{2}-\d{2}_\d{4}________\d{4}.csv)$",
|
|
53
|
+
str(file_path.name),
|
|
54
|
+
)
|
|
55
|
+
is not None
|
|
56
|
+
)
|
|
57
|
+
return is_file_name_match
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def extract_transactions(
|
|
61
|
+
case: Case, file_path: Path, lines: T.List[str], encoding: str
|
|
62
|
+
) -> pl.DataFrame:
|
|
63
|
+
transaction_pattern: str = '^("?Umsatz)' # start of transactions
|
|
64
|
+
|
|
65
|
+
date_format: str = "%d.%m.%Y"
|
|
66
|
+
date_cols: list = ["Belegdatum"]
|
|
67
|
+
|
|
68
|
+
ix_start_transactions, transactions_header = find_line_with_pattern(
|
|
69
|
+
lines, pattern=transaction_pattern
|
|
70
|
+
)
|
|
71
|
+
logger.debug(
|
|
72
|
+
f"{file_path=} has {ix_start_transactions=} and {transactions_header=}"
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
schema = {
|
|
76
|
+
"Umsatz abgerechnet und nicht im Saldo enthalten": pl.Utf8,
|
|
77
|
+
"Wertstellung": pl.Utf8,
|
|
78
|
+
"Belegdatum": pl.Utf8,
|
|
79
|
+
"Beschreibung": pl.Utf8,
|
|
80
|
+
"Betrag (EUR)": pl.Utf8,
|
|
81
|
+
"Ursprünglicher Betrag": pl.Utf8,
|
|
82
|
+
}
|
|
83
|
+
transactions = pl.read_csv(
|
|
84
|
+
file_path,
|
|
85
|
+
skip_rows=ix_start_transactions,
|
|
86
|
+
separator=";",
|
|
87
|
+
truncate_ragged_lines=True,
|
|
88
|
+
encoding=encoding,
|
|
89
|
+
schema=schema,
|
|
90
|
+
)
|
|
91
|
+
transactions = transactions.with_columns(
|
|
92
|
+
[pl.col(col).str.to_date(date_format) for col in date_cols],
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
transactions = transactions.with_columns(
|
|
96
|
+
pl.col("Betrag (EUR)")
|
|
97
|
+
.str.strip_chars_end()
|
|
98
|
+
.map_elements(german_string_numbers_to_floats, return_dtype=pl.Float64),
|
|
99
|
+
)
|
|
100
|
+
transactions = transactions.with_columns(
|
|
101
|
+
amount=pl.col("Betrag (EUR)"),
|
|
102
|
+
description=pl.col("Beschreibung"),
|
|
103
|
+
date=pl.col("Belegdatum"),
|
|
104
|
+
source=pl.when(pl.col("Betrag (EUR)") > 0)
|
|
105
|
+
.then(pl.col("Beschreibung"))
|
|
106
|
+
.otherwise(pl.lit("myself")),
|
|
107
|
+
recipient=pl.when(pl.col("Betrag (EUR)") < 0)
|
|
108
|
+
.then(pl.col("Beschreibung"))
|
|
109
|
+
.otherwise(pl.lit("myself")),
|
|
110
|
+
provider=pl.lit(case.provider),
|
|
111
|
+
service=pl.lit(case.service),
|
|
112
|
+
parser=pl.lit(case.parser),
|
|
113
|
+
file=pl.lit(str(file_path)),
|
|
114
|
+
)
|
|
115
|
+
transactions = hash_transactions(transactions, hash_columns=HASH_COLUMNS)
|
|
116
|
+
|
|
117
|
+
verify_transactions(TRANSACTION_COLUMNS, transactions, file_path)
|
|
118
|
+
|
|
119
|
+
transactions = transactions.select(TRANSACTION_COLUMNS)
|
|
120
|
+
|
|
121
|
+
return transactions
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def extract_balance(case: Case, file_path: Path, lines: T.List[str]) -> BalanceInfo:
|
|
125
|
+
balance_info_pattern: str = '^("?Saldo:)' # start of balance info
|
|
126
|
+
ix_start_balance, balance_line = find_line_with_pattern(
|
|
127
|
+
lines, pattern=balance_info_pattern
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
logger.debug(f"{file_path=} has {ix_start_balance=} and {balance_line=}")
|
|
131
|
+
|
|
132
|
+
_lines = lines[ix_start_balance : ix_start_balance + 2]
|
|
133
|
+
|
|
134
|
+
total, date = _lines[0], _lines[1]
|
|
135
|
+
|
|
136
|
+
date = date.split(";")[1]
|
|
137
|
+
date = date.strip(";").strip('"')
|
|
138
|
+
|
|
139
|
+
date = [int(v) for v in date.split(".")]
|
|
140
|
+
date = datetime.date(date[2], date[1], date[0])
|
|
141
|
+
|
|
142
|
+
total = total.split(";")[1]
|
|
143
|
+
total = total.strip(";").strip(":").strip('"').split(" ")
|
|
144
|
+
amount, currency = total[0], total[1]
|
|
145
|
+
amount = float(amount)
|
|
146
|
+
|
|
147
|
+
return BalanceInfo(
|
|
148
|
+
date=date,
|
|
149
|
+
amount=amount,
|
|
150
|
+
currency=currency,
|
|
151
|
+
provider=case.provider,
|
|
152
|
+
service=case.service,
|
|
153
|
+
parser=case.parser,
|
|
154
|
+
file=str(file_path),
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def parse_csv_file(case: Case, file_path: Path) -> tuple[pl.DataFrame, BalanceInfo]:
|
|
159
|
+
encoding = detect_encoding(file_path)
|
|
160
|
+
logger.debug(f"{file_path=} has {encoding=}")
|
|
161
|
+
|
|
162
|
+
lines = load_lines(file_path, encoding)
|
|
163
|
+
|
|
164
|
+
try:
|
|
165
|
+
transactions = extract_transactions(case, file_path, lines, encoding)
|
|
166
|
+
except Exception as e:
|
|
167
|
+
msg = f"failed to parse {case=} transactions: {file_path=}"
|
|
168
|
+
logger.error(msg)
|
|
169
|
+
raise ExtractTransactionsException(msg) from e
|
|
170
|
+
|
|
171
|
+
try:
|
|
172
|
+
balance = extract_balance(case, file_path, lines)
|
|
173
|
+
except Exception as e:
|
|
174
|
+
msg = f"failed to parse {case=} balance: {file_path=}"
|
|
175
|
+
logger.error(msg)
|
|
176
|
+
raise ExtractBalanceException(msg) from e
|
|
177
|
+
|
|
178
|
+
return transactions, balance
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def parse_new_files(
|
|
182
|
+
case: Case,
|
|
183
|
+
new_files_to_parse: list[Path],
|
|
184
|
+
parsed_dir: Path,
|
|
185
|
+
):
|
|
186
|
+
if len(new_files_to_parse) == 0:
|
|
187
|
+
logger.info("No new files to parse")
|
|
188
|
+
return
|
|
189
|
+
|
|
190
|
+
if not parsed_dir.exists():
|
|
191
|
+
logger.info(f"Creating {parsed_dir=}")
|
|
192
|
+
parsed_dir.mkdir(parents=True, exist_ok=True)
|
|
193
|
+
|
|
194
|
+
logger.info(f"Parsing {len(new_files_to_parse):_} new files to {parsed_dir=}")
|
|
195
|
+
|
|
196
|
+
for file_path in new_files_to_parse:
|
|
197
|
+
logger.debug(f"Parsing {file_path=} to {parsed_dir=}")
|
|
198
|
+
try:
|
|
199
|
+
transactions, balance = parse_csv_file(case, file_path)
|
|
200
|
+
except (ExtractBalanceException, ExtractTransactionsException):
|
|
201
|
+
continue # already logged in parse_csv_file
|
|
202
|
+
|
|
203
|
+
store_transactions(parsed_dir, file_path, transactions)
|
|
204
|
+
store_balance(parsed_dir, file_path, balance)
|
|
205
|
+
|
|
206
|
+
logger.info(f"Finished parsing {len(new_files_to_parse):_d} new files")
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def main(config: Config):
|
|
210
|
+
logger.info(f"Processing {CASE=}")
|
|
211
|
+
|
|
212
|
+
# scan source files
|
|
213
|
+
relevant_source_files = get_parser_source_files(
|
|
214
|
+
CASE, config, check_if_parser_applies
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
# scan target files
|
|
218
|
+
raw_dir = config.get_raw_dir(CASE)
|
|
219
|
+
relevant_target_files = detect_relevant_target_files(raw_dir)
|
|
220
|
+
|
|
221
|
+
# select new source files to be processed
|
|
222
|
+
new_files_to_copy = select_files_to_copy(
|
|
223
|
+
relevant_source_files, relevant_target_files
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
# copy new source files
|
|
227
|
+
copy_new_files(raw_dir, new_files_to_copy)
|
|
228
|
+
|
|
229
|
+
# detect new raw files
|
|
230
|
+
parsed_dir = config.get_parsed_dir(CASE)
|
|
231
|
+
new_files_to_parse = detect_new_raw_files(
|
|
232
|
+
raw_dir, check_if_parser_applies, parsed_dir, CASE.provider, CASE.service
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
# parse new files to parquet -> transactions & balance
|
|
236
|
+
parse_new_files(CASE, new_files_to_parse, parsed_dir)
|
|
237
|
+
|
|
238
|
+
# extend pre-existing parquets for this parser
|
|
239
|
+
parser_dir = config.get_parser_dir(CASE)
|
|
240
|
+
concatenate_new_information_to_history(parser_dir, parsed_dir, new_files_to_parse)
|
|
241
|
+
|
|
242
|
+
logger.info(f"Done processing {CASE=}")
|