fintl 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. fintl/__init__.py +0 -0
  2. fintl/accounts_etl/__init__.py +0 -0
  3. fintl/accounts_etl/dkb/__init__.py +0 -0
  4. fintl/accounts_etl/dkb/credit0.py +242 -0
  5. fintl/accounts_etl/dkb/festgeld0.py +245 -0
  6. fintl/accounts_etl/dkb/files.py +230 -0
  7. fintl/accounts_etl/dkb/giro0.py +249 -0
  8. fintl/accounts_etl/dkb/giro202307.py +251 -0
  9. fintl/accounts_etl/dkb/giro202312.py +263 -0
  10. fintl/accounts_etl/dkb/plugin.py +95 -0
  11. fintl/accounts_etl/dkb/tagesgeld0.py +268 -0
  12. fintl/accounts_etl/dkb/tagesgeld202307.py +257 -0
  13. fintl/accounts_etl/dkb/tagesgeld202312.py +261 -0
  14. fintl/accounts_etl/exceptions.py +6 -0
  15. fintl/accounts_etl/file_helper.py +143 -0
  16. fintl/accounts_etl/files.py +62 -0
  17. fintl/accounts_etl/gls/__init__.py +0 -0
  18. fintl/accounts_etl/gls/credit0.py +94 -0
  19. fintl/accounts_etl/gls/giro0.py +91 -0
  20. fintl/accounts_etl/gls/helper.py +204 -0
  21. fintl/accounts_etl/gls/plugin.py +38 -0
  22. fintl/accounts_etl/labels.py +57 -0
  23. fintl/accounts_etl/postbank/__init__.py +0 -0
  24. fintl/accounts_etl/postbank/giro0.py +250 -0
  25. fintl/accounts_etl/postbank/giro202305.py +271 -0
  26. fintl/accounts_etl/postbank/plugin.py +32 -0
  27. fintl/accounts_etl/process_accounts.py +99 -0
  28. fintl/accounts_etl/registry.py +28 -0
  29. fintl/accounts_etl/runner.py +205 -0
  30. fintl/accounts_etl/scalable/__init__.py +0 -0
  31. fintl/accounts_etl/scalable/broker0.py +209 -0
  32. fintl/accounts_etl/scalable/broker20231028.py +165 -0
  33. fintl/accounts_etl/scalable/broker20260309.py +293 -0
  34. fintl/accounts_etl/scalable/files.py +135 -0
  35. fintl/accounts_etl/scalable/plugin.py +47 -0
  36. fintl/accounts_etl/schemas.py +336 -0
  37. fintl/accounts_etl/store.py +169 -0
  38. fintl/accounts_etl/utils.py +149 -0
  39. fintl/cli/README.md +392 -0
  40. fintl/cli/__init__.py +0 -0
  41. fintl/cli/etl.py +23 -0
  42. fintl/cli/main.py +27 -0
  43. fintl/cli/plot.py +46 -0
  44. fintl/cli/search.py +388 -0
  45. fintl/cli/search.tcss +45 -0
  46. fintl/cli/store.py +121 -0
  47. fintl/fine_logging/__init__.py +237 -0
  48. fintl/path_utils.py +20 -0
  49. fintl-0.1.0.dist-info/METADATA +112 -0
  50. fintl-0.1.0.dist-info/RECORD +52 -0
  51. fintl-0.1.0.dist-info/WHEEL +4 -0
  52. fintl-0.1.0.dist-info/entry_points.txt +3 -0
fintl/__init__.py ADDED
File without changes
File without changes
File without changes
@@ -0,0 +1,242 @@
1
+ import datetime
2
+ import logging
3
+ import re
4
+ import typing as T
5
+ from pathlib import Path
6
+
7
+ import polars as pl
8
+
9
+ from fintl.accounts_etl.exceptions import (
10
+ ExtractBalanceException,
11
+ ExtractTransactionsException,
12
+ )
13
+ from fintl.accounts_etl.file_helper import (
14
+ concatenate_new_information_to_history,
15
+ detect_new_raw_files,
16
+ detect_relevant_target_files,
17
+ get_parser_source_files,
18
+ store_balance,
19
+ store_transactions,
20
+ )
21
+ from fintl.accounts_etl.files import copy_new_files, load_lines, select_files_to_copy
22
+ from fintl.accounts_etl.schemas import (
23
+ HASH_COLUMNS,
24
+ TRANSACTION_COLUMNS,
25
+ BalanceInfo,
26
+ Case,
27
+ Config,
28
+ DKBCreditParserEnum,
29
+ ProviderEnum,
30
+ ServiceEnum,
31
+ )
32
+ from fintl.accounts_etl.utils import (
33
+ detect_encoding,
34
+ find_line_with_pattern,
35
+ german_string_numbers_to_floats,
36
+ hash_transactions,
37
+ verify_transactions,
38
+ )
39
+
40
+ logger = logging.getLogger(__name__)
41
+
42
+ CASE = Case(
43
+ provider=ProviderEnum.dkb.value,
44
+ service=ServiceEnum.credit.value,
45
+ parser=DKBCreditParserEnum.credit0.value,
46
+ )
47
+
48
+
49
+ def check_if_parser_applies(file_path: Path) -> bool:
50
+ is_file_name_match = (
51
+ re.search(
52
+ r"(\d{4}-\d{2}-\d{2}_to_\d{4}-\d{2}-\d{2}_\d{4}________\d{4}.csv)$",
53
+ str(file_path.name),
54
+ )
55
+ is not None
56
+ )
57
+ return is_file_name_match
58
+
59
+
60
+ def extract_transactions(
61
+ case: Case, file_path: Path, lines: T.List[str], encoding: str
62
+ ) -> pl.DataFrame:
63
+ transaction_pattern: str = '^("?Umsatz)' # start of transactions
64
+
65
+ date_format: str = "%d.%m.%Y"
66
+ date_cols: list = ["Belegdatum"]
67
+
68
+ ix_start_transactions, transactions_header = find_line_with_pattern(
69
+ lines, pattern=transaction_pattern
70
+ )
71
+ logger.debug(
72
+ f"{file_path=} has {ix_start_transactions=} and {transactions_header=}"
73
+ )
74
+
75
+ schema = {
76
+ "Umsatz abgerechnet und nicht im Saldo enthalten": pl.Utf8,
77
+ "Wertstellung": pl.Utf8,
78
+ "Belegdatum": pl.Utf8,
79
+ "Beschreibung": pl.Utf8,
80
+ "Betrag (EUR)": pl.Utf8,
81
+ "Ursprünglicher Betrag": pl.Utf8,
82
+ }
83
+ transactions = pl.read_csv(
84
+ file_path,
85
+ skip_rows=ix_start_transactions,
86
+ separator=";",
87
+ truncate_ragged_lines=True,
88
+ encoding=encoding,
89
+ schema=schema,
90
+ )
91
+ transactions = transactions.with_columns(
92
+ [pl.col(col).str.to_date(date_format) for col in date_cols],
93
+ )
94
+
95
+ transactions = transactions.with_columns(
96
+ pl.col("Betrag (EUR)")
97
+ .str.strip_chars_end()
98
+ .map_elements(german_string_numbers_to_floats, return_dtype=pl.Float64),
99
+ )
100
+ transactions = transactions.with_columns(
101
+ amount=pl.col("Betrag (EUR)"),
102
+ description=pl.col("Beschreibung"),
103
+ date=pl.col("Belegdatum"),
104
+ source=pl.when(pl.col("Betrag (EUR)") > 0)
105
+ .then(pl.col("Beschreibung"))
106
+ .otherwise(pl.lit("myself")),
107
+ recipient=pl.when(pl.col("Betrag (EUR)") < 0)
108
+ .then(pl.col("Beschreibung"))
109
+ .otherwise(pl.lit("myself")),
110
+ provider=pl.lit(case.provider),
111
+ service=pl.lit(case.service),
112
+ parser=pl.lit(case.parser),
113
+ file=pl.lit(str(file_path)),
114
+ )
115
+ transactions = hash_transactions(transactions, hash_columns=HASH_COLUMNS)
116
+
117
+ verify_transactions(TRANSACTION_COLUMNS, transactions, file_path)
118
+
119
+ transactions = transactions.select(TRANSACTION_COLUMNS)
120
+
121
+ return transactions
122
+
123
+
124
+ def extract_balance(case: Case, file_path: Path, lines: T.List[str]) -> BalanceInfo:
125
+ balance_info_pattern: str = '^("?Saldo:)' # start of balance info
126
+ ix_start_balance, balance_line = find_line_with_pattern(
127
+ lines, pattern=balance_info_pattern
128
+ )
129
+
130
+ logger.debug(f"{file_path=} has {ix_start_balance=} and {balance_line=}")
131
+
132
+ _lines = lines[ix_start_balance : ix_start_balance + 2]
133
+
134
+ total, date = _lines[0], _lines[1]
135
+
136
+ date = date.split(";")[1]
137
+ date = date.strip(";").strip('"')
138
+
139
+ date = [int(v) for v in date.split(".")]
140
+ date = datetime.date(date[2], date[1], date[0])
141
+
142
+ total = total.split(";")[1]
143
+ total = total.strip(";").strip(":").strip('"').split(" ")
144
+ amount, currency = total[0], total[1]
145
+ amount = float(amount)
146
+
147
+ return BalanceInfo(
148
+ date=date,
149
+ amount=amount,
150
+ currency=currency,
151
+ provider=case.provider,
152
+ service=case.service,
153
+ parser=case.parser,
154
+ file=str(file_path),
155
+ )
156
+
157
+
158
+ def parse_csv_file(case: Case, file_path: Path) -> tuple[pl.DataFrame, BalanceInfo]:
159
+ encoding = detect_encoding(file_path)
160
+ logger.debug(f"{file_path=} has {encoding=}")
161
+
162
+ lines = load_lines(file_path, encoding)
163
+
164
+ try:
165
+ transactions = extract_transactions(case, file_path, lines, encoding)
166
+ except Exception as e:
167
+ msg = f"failed to parse {case=} transactions: {file_path=}"
168
+ logger.error(msg)
169
+ raise ExtractTransactionsException(msg) from e
170
+
171
+ try:
172
+ balance = extract_balance(case, file_path, lines)
173
+ except Exception as e:
174
+ msg = f"failed to parse {case=} balance: {file_path=}"
175
+ logger.error(msg)
176
+ raise ExtractBalanceException(msg) from e
177
+
178
+ return transactions, balance
179
+
180
+
181
+ def parse_new_files(
182
+ case: Case,
183
+ new_files_to_parse: list[Path],
184
+ parsed_dir: Path,
185
+ ):
186
+ if len(new_files_to_parse) == 0:
187
+ logger.info("No new files to parse")
188
+ return
189
+
190
+ if not parsed_dir.exists():
191
+ logger.info(f"Creating {parsed_dir=}")
192
+ parsed_dir.mkdir(parents=True, exist_ok=True)
193
+
194
+ logger.info(f"Parsing {len(new_files_to_parse):_} new files to {parsed_dir=}")
195
+
196
+ for file_path in new_files_to_parse:
197
+ logger.debug(f"Parsing {file_path=} to {parsed_dir=}")
198
+ try:
199
+ transactions, balance = parse_csv_file(case, file_path)
200
+ except (ExtractBalanceException, ExtractTransactionsException):
201
+ continue # already logged in parse_csv_file
202
+
203
+ store_transactions(parsed_dir, file_path, transactions)
204
+ store_balance(parsed_dir, file_path, balance)
205
+
206
+ logger.info(f"Finished parsing {len(new_files_to_parse):_d} new files")
207
+
208
+
209
+ def main(config: Config):
210
+ logger.info(f"Processing {CASE=}")
211
+
212
+ # scan source files
213
+ relevant_source_files = get_parser_source_files(
214
+ CASE, config, check_if_parser_applies
215
+ )
216
+
217
+ # scan target files
218
+ raw_dir = config.get_raw_dir(CASE)
219
+ relevant_target_files = detect_relevant_target_files(raw_dir)
220
+
221
+ # select new source files to be processed
222
+ new_files_to_copy = select_files_to_copy(
223
+ relevant_source_files, relevant_target_files
224
+ )
225
+
226
+ # copy new source files
227
+ copy_new_files(raw_dir, new_files_to_copy)
228
+
229
+ # detect new raw files
230
+ parsed_dir = config.get_parsed_dir(CASE)
231
+ new_files_to_parse = detect_new_raw_files(
232
+ raw_dir, check_if_parser_applies, parsed_dir, CASE.provider, CASE.service
233
+ )
234
+
235
+ # parse new files to parquet -> transactions & balance
236
+ parse_new_files(CASE, new_files_to_parse, parsed_dir)
237
+
238
+ # extend pre-existing parquets for this parser
239
+ parser_dir = config.get_parser_dir(CASE)
240
+ concatenate_new_information_to_history(parser_dir, parsed_dir, new_files_to_parse)
241
+
242
+ logger.info(f"Done processing {CASE=}")
@@ -0,0 +1,245 @@
1
+ import logging
2
+ import re
3
+ import typing as T
4
+ from pathlib import Path
5
+
6
+ import polars as pl
7
+
8
+ from fintl.accounts_etl.dkb.giro202307 import extract_balance
9
+ from fintl.accounts_etl.dkb.giro202312 import detect_separator
10
+ from fintl.accounts_etl.exceptions import (
11
+ ExtractBalanceException,
12
+ ExtractTransactionsException,
13
+ )
14
+ from fintl.accounts_etl.file_helper import (
15
+ concatenate_new_information_to_history,
16
+ detect_new_parsed_files,
17
+ detect_new_raw_files,
18
+ detect_relevant_target_files,
19
+ get_parser_source_files,
20
+ store_balance,
21
+ store_transactions,
22
+ )
23
+ from fintl.accounts_etl.files import copy_new_files, load_lines, select_files_to_copy
24
+ from fintl.accounts_etl.schemas import (
25
+ HASH_COLUMNS,
26
+ TRANSACTION_COLUMNS,
27
+ BalanceInfo,
28
+ Case,
29
+ Config,
30
+ DKBFestgeltParserEnum,
31
+ ProviderEnum,
32
+ ServiceEnum,
33
+ )
34
+ from fintl.accounts_etl.utils import (
35
+ detect_encoding,
36
+ find_line_with_pattern,
37
+ german_string_numbers_to_floats,
38
+ hash_transactions,
39
+ verify_transactions,
40
+ )
41
+
42
+ logger = logging.getLogger(__name__)
43
+
44
+ CASE = Case(
45
+ provider=ProviderEnum.dkb.value,
46
+ service=ServiceEnum.festgeld.value,
47
+ parser=DKBFestgeltParserEnum.festgeld0.value,
48
+ )
49
+
50
+
51
+ def check_if_parser_applies(file_path: Path) -> bool:
52
+ is_file_name_match = re.search(r"(DE\d{20}\.csv$)", str(file_path.name)) is not None
53
+ logger.debug(f"{is_file_name_match=}")
54
+
55
+ # check if the csv file at file_path contains "Betrag (€)"
56
+ encoding = detect_encoding(file_path)
57
+ lines = load_lines(file_path, encoding)
58
+
59
+ separator = detect_separator(lines)
60
+ is_expected_separator = separator is not None and separator in [",", ";"]
61
+ return is_file_name_match and is_expected_separator
62
+
63
+
64
+ def extract_transactions(
65
+ case: Case, file_path: Path, lines: T.List[str], encoding: str
66
+ ) -> pl.DataFrame:
67
+ transaction_pattern: str = '^("?Buchungsdatum)' # start of transactions
68
+
69
+ date_format: str = "%d.%m.%y"
70
+ date_cols: list = ["Buchungsdatum"]
71
+
72
+ ix_start_transactions, transactions_header = find_line_with_pattern(
73
+ lines, pattern=transaction_pattern
74
+ )
75
+ is_empty_1st_line = len(lines[0].strip()) == 0
76
+ logger.debug(
77
+ f"{file_path=} ({is_empty_1st_line=}) has {ix_start_transactions=} and {transactions_header=}"
78
+ )
79
+
80
+ schema = {
81
+ "Buchungsdatum": pl.Utf8,
82
+ "Wertstellung": pl.Utf8,
83
+ "Status": pl.Utf8,
84
+ "Zahlungspflichtige*r": pl.Utf8,
85
+ "Zahlungsempfänger*in": pl.Utf8,
86
+ "Verwendungszweck": pl.Utf8,
87
+ "Umsatztyp": pl.Utf8,
88
+ "IBAN": pl.Utf8,
89
+ "Betrag (€)": pl.Utf8,
90
+ "Gläubiger-ID": pl.Utf8,
91
+ "Mandatsreferenz": pl.Utf8,
92
+ "Kundenreferenz": pl.Utf8,
93
+ }
94
+ separator = detect_separator(lines)
95
+ if separator is None:
96
+ raise ValueError(
97
+ f"{separator=} but it is not allowed to be None in the following."
98
+ )
99
+
100
+ transactions = pl.read_csv(
101
+ file_path,
102
+ skip_rows=ix_start_transactions - 1
103
+ if is_empty_1st_line
104
+ else ix_start_transactions,
105
+ separator=separator,
106
+ truncate_ragged_lines=True,
107
+ encoding=encoding,
108
+ schema=schema,
109
+ )
110
+
111
+ try:
112
+ transactions = transactions.with_columns(
113
+ [pl.col(col).str.to_date(date_format) for col in date_cols],
114
+ )
115
+ except pl.exceptions.InvalidOperationError as ex:
116
+ logger.error(f"{separator=}")
117
+ logger.error(f"{len(transactions)=:_}")
118
+ logger.error(f"{transactions[date_cols[0]].to_list()=}")
119
+ msg = f"{file_path=}: Failed to convert dates for values in one of the columns:"
120
+ for col in date_cols:
121
+ for v in transactions[col].unique():
122
+ s = pl.Series([v])
123
+ try:
124
+ s.str.to_date(date_format)
125
+ except: # noqa: E722
126
+ msg += f"\ncolumn '{col}' failed for value '{v}'"
127
+ logger.error(msg)
128
+ raise ex
129
+
130
+ transactions = transactions.with_columns(
131
+ pl.col("Betrag (€)")
132
+ .str.replace("€", "")
133
+ .str.strip_chars_end()
134
+ .map_elements(german_string_numbers_to_floats, return_dtype=pl.Float64),
135
+ )
136
+ transactions = transactions.with_columns(
137
+ amount=pl.col("Betrag (€)"),
138
+ description=pl.col("Verwendungszweck"),
139
+ date=pl.col("Buchungsdatum"),
140
+ source=pl.when(pl.col("Betrag (€)") > 0)
141
+ .then(pl.col("Zahlungspflichtige*r"))
142
+ .otherwise(pl.lit("myself")),
143
+ recipient=pl.when(pl.col("Betrag (€)") < 0)
144
+ .then(pl.col("Zahlungsempfänger*in"))
145
+ .otherwise(pl.lit("myself")),
146
+ provider=pl.lit(case.provider),
147
+ service=pl.lit(case.service),
148
+ parser=pl.lit(case.parser),
149
+ file=pl.lit(str(file_path)),
150
+ )
151
+ transactions = hash_transactions(transactions, HASH_COLUMNS)
152
+
153
+ verify_transactions(TRANSACTION_COLUMNS, transactions, file_path)
154
+
155
+ transactions = transactions.select(TRANSACTION_COLUMNS)
156
+
157
+ return transactions
158
+
159
+
160
+ def parse_csv_file(case: Case, file_path: Path) -> tuple[pl.DataFrame, BalanceInfo]:
161
+ encoding = detect_encoding(file_path)
162
+ logger.debug(f"{file_path=} has {encoding=}")
163
+
164
+ lines = load_lines(file_path, encoding)
165
+
166
+ try:
167
+ transactions = extract_transactions(case, file_path, lines, encoding)
168
+ except Exception as e:
169
+ msg = f"failed to parse {case=} transactions: {file_path=}"
170
+ logger.error(msg)
171
+ raise ExtractTransactionsException(msg) from e
172
+
173
+ try:
174
+ balance = extract_balance(case, file_path, lines)
175
+ except Exception as e:
176
+ msg = f"failed to parse {case=} balance: {file_path=}"
177
+ logger.error(msg)
178
+ raise ExtractBalanceException(msg) from e
179
+
180
+ return transactions, balance
181
+
182
+
183
+ def parse_new_files(
184
+ case: Case,
185
+ new_files_to_parse: list[Path],
186
+ parsed_dir: Path,
187
+ ):
188
+ if len(new_files_to_parse) == 0:
189
+ logger.info("No new files to parse")
190
+ return
191
+
192
+ if not parsed_dir.exists():
193
+ logger.info(f"Creating {parsed_dir=}")
194
+ parsed_dir.mkdir(parents=True, exist_ok=True)
195
+
196
+ logger.info(f"Parsing {len(new_files_to_parse):_} new files to {parsed_dir=}")
197
+
198
+ for file_path in new_files_to_parse:
199
+ logger.debug(f"Parsing {file_path=} to {parsed_dir=}")
200
+ try:
201
+ transactions, balance = parse_csv_file(case, file_path)
202
+ except (ExtractBalanceException, ExtractTransactionsException):
203
+ continue # already logged in parse_csv_file
204
+
205
+ store_transactions(parsed_dir, file_path, transactions)
206
+ store_balance(parsed_dir, file_path, balance)
207
+
208
+ logger.info(f"Finished parsing {len(new_files_to_parse):_d} new files")
209
+
210
+
211
+ def main(config: Config):
212
+ logger.info(f"Processing {CASE=}")
213
+
214
+ # scan source files
215
+ relevant_source_files = get_parser_source_files(
216
+ CASE, config, check_if_parser_applies
217
+ )
218
+
219
+ # scan target files
220
+ raw_dir = config.get_raw_dir(CASE)
221
+ relevant_target_files = detect_relevant_target_files(raw_dir)
222
+
223
+ # select new source files to be processed
224
+ new_files_to_copy = select_files_to_copy(
225
+ relevant_source_files, relevant_target_files
226
+ )
227
+
228
+ # copy new source files
229
+ copy_new_files(raw_dir, new_files_to_copy)
230
+
231
+ # detect new raw files
232
+ parsed_dir = config.get_parsed_dir(CASE)
233
+ new_files_to_parse = detect_new_raw_files(
234
+ raw_dir, check_if_parser_applies, parsed_dir, CASE.provider, CASE.service
235
+ )
236
+
237
+ # parse new files to parquet -> transactions & balance
238
+ parse_new_files(CASE, new_files_to_parse, parsed_dir)
239
+
240
+ # extend pre-existing parquets for this parser
241
+ parser_dir = config.get_parser_dir(CASE)
242
+ new_parsed_files = detect_new_parsed_files(raw_dir, parser_dir, parsed_dir)
243
+ concatenate_new_information_to_history(parser_dir, parsed_dir, new_parsed_files)
244
+
245
+ logger.info(f"Done processing {CASE=}")