beanhub-import 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Launch Platform
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,26 @@
1
+ Metadata-Version: 2.1
2
+ Name: beanhub-import
3
+ Version: 0.0.1
4
+ Summary: The simple library for import extracted transactions provided by beanhub-extract and generate corresponding Beancount transactions based on predefined rules
5
+ License: MIT
6
+ Author: Fang-Pen Lin
7
+ Author-email: fangpen@launchplatform.com
8
+ Requires-Python: >=3.9,<4.0
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.9
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Requires-Dist: beancount-black (>=1.0.2,<2.0.0)
16
+ Requires-Dist: beancount-parser (>=1.1.0,<2.0.0)
17
+ Requires-Dist: beanhub-extract (>=0.0.5,<0.0.6)
18
+ Requires-Dist: jinja2 (>=3.1.3,<4.0.0)
19
+ Requires-Dist: pydantic (>=2.7.1,<3.0.0)
20
+ Requires-Dist: pytz (>=2024.1,<2025.0)
21
+ Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
22
+ Description-Content-Type: text/markdown
23
+
24
+ # beanhub-import
25
+ The simple library for import extracted transactions provided by beanhub-extract and generate corresponding Beancount transactions based on predefined rules
26
+
@@ -0,0 +1,2 @@
1
+ # beanhub-import
2
+ The simple library for import extracted transactions provided by beanhub-extract and generate corresponding Beancount transactions based on predefined rules
File without changes
@@ -0,0 +1 @@
1
+ IMPORT_ID_KEY = "import-id"
@@ -0,0 +1,158 @@
1
+ import dataclasses
2
+ import enum
3
+ import pathlib
4
+ import typing
5
+
6
+ import pydantic
7
+ from pydantic import BaseModel
8
+
9
+
10
+ class ImportBaseModel(BaseModel):
11
+ pass
12
+
13
+
14
+ class StrRegexMatch(ImportBaseModel):
15
+ regex: str
16
+
17
+
18
+ class StrExactMatch(ImportBaseModel):
19
+ equals: str
20
+
21
+
22
+ class StrPrefixMatch(ImportBaseModel):
23
+ prefix: str
24
+
25
+
26
+ class StrSuffixMatch(ImportBaseModel):
27
+ suffix: str
28
+
29
+
30
+ class StrContainsMatch(ImportBaseModel):
31
+ contains: str
32
+
33
+
34
+ StrMatch = str | StrPrefixMatch | StrSuffixMatch | StrExactMatch | StrContainsMatch
35
+
36
+
37
+ class SimpleTxnMatchRule(ImportBaseModel):
38
+ extractor: StrMatch | None = None
39
+ file: StrMatch | None = None
40
+ date: StrMatch | None = None
41
+ post_date: StrMatch | None = None
42
+ timezone: StrMatch = None
43
+ desc: StrMatch = None
44
+ bank_desc: StrMatch = None
45
+ currency: StrMatch | None = None
46
+ category: StrMatch | None = None
47
+ status: StrMatch | None = None
48
+ type: StrMatch | None = None
49
+ source_account: StrMatch | None = None
50
+ dest_account: StrMatch | None = None
51
+ note: StrMatch | None = None
52
+ reference: StrMatch | None = None
53
+ payee: StrMatch | None = None
54
+
55
+
56
+ TxnMatchRule = SimpleTxnMatchRule
57
+
58
+
59
+ @enum.unique
60
+ class ActionType(str, enum.Enum):
61
+ add_txn = "add_txn"
62
+
63
+
64
+ class PostingTemplate(ImportBaseModel):
65
+ # account of the posting
66
+ account: str | None = None
67
+ # amount of the posting
68
+ amount: str | None = None
69
+ # currency of the posting
70
+ currency: str | None = None
71
+ # TODO: support cost / price and etc
72
+
73
+
74
+ class TransactionTemplate(ImportBaseModel):
75
+ # the import-id for de-duplication
76
+ id: str | None = None
77
+ date: str | None = None
78
+ flag: str | None = None
79
+ narration: str | None = None
80
+ payee: str | None = None
81
+ postings: list[PostingTemplate] | None = None
82
+
83
+
84
+ class GeneratedPosting(ImportBaseModel):
85
+ # account of the posting
86
+ account: str
87
+ # amount of the posting
88
+ amount: str | None = None
89
+ # currency of the posting
90
+ currency: str | None = None
91
+ # TODO: support cost / price and etc
92
+
93
+
94
+ class GeneratedTransaction(ImportBaseModel):
95
+ file: str
96
+ # the import-id for de-duplication
97
+ id: str
98
+ date: str
99
+ flag: str
100
+ narration: str
101
+ payee: str | None = None
102
+ postings: list[GeneratedPosting]
103
+
104
+
105
+ class ActionAddTxn(ImportBaseModel):
106
+ type: typing.Literal[ActionType.add_txn] = pydantic.Field(ActionType.add_txn)
107
+ file: str
108
+ txn: TransactionTemplate
109
+
110
+
111
+ Action = ActionAddTxn
112
+
113
+
114
+ SimpleFileMatch = str | StrExactMatch | StrRegexMatch
115
+
116
+
117
+ class InputConfigDetails(ImportBaseModel):
118
+ extractor: str | None = None
119
+ prepend_postings: list[PostingTemplate] | None = None
120
+ appending_postings: list[PostingTemplate] | None = None
121
+ default_txn: TransactionTemplate | None = None
122
+
123
+
124
+ class InputConfig(ImportBaseModel):
125
+ match: SimpleFileMatch
126
+ config: InputConfigDetails
127
+
128
+
129
+ class OutputConfig(ImportBaseModel):
130
+ match: SimpleFileMatch
131
+
132
+
133
+ class ImportRule(ImportBaseModel):
134
+ match: TxnMatchRule
135
+ actions: list[Action]
136
+
137
+
138
+ class ImportDoc(ImportBaseModel):
139
+ inputs: list[InputConfig]
140
+ imports: list[ImportRule]
141
+ outputs: list[OutputConfig] | None = None
142
+
143
+
144
+ @dataclasses.dataclass(frozen=True)
145
+ class ImportedTransaction:
146
+ file: pathlib.Path
147
+ lineno: int
148
+ id: str
149
+
150
+
151
+ @dataclasses.dataclass(frozen=True)
152
+ class ChangeSet:
153
+ # list of imported transaction to remove
154
+ remove: list[ImportedTransaction]
155
+ # map from
156
+ update: dict[int, GeneratedTransaction]
157
+ # list of generated transaction to add
158
+ add: list[GeneratedTransaction]
@@ -0,0 +1,174 @@
1
+ import collections
2
+ import copy
3
+ import itertools
4
+ import json
5
+ import pathlib
6
+ import typing
7
+
8
+ from beancount_parser.data_types import Entry
9
+ from beancount_parser.data_types import EntryType
10
+ from beancount_parser.helpers import collect_entries
11
+ from beancount_parser.parser import make_parser
12
+ from beancount_parser.parser import traverse
13
+ from lark import Lark
14
+ from lark import Tree
15
+
16
+ from . import constants
17
+ from .data_types import ChangeSet
18
+ from .data_types import GeneratedPosting
19
+ from .data_types import GeneratedTransaction
20
+ from .data_types import ImportedTransaction
21
+
22
+
23
+ def extract_imported_transactions(
24
+ parser: Lark, bean_file: pathlib.Path, import_id_key: str = constants.IMPORT_ID_KEY
25
+ ) -> typing.Generator[ImportedTransaction, None, None]:
26
+ last_txn = None
27
+ for bean_path, tree in traverse(parser=parser, bean_file=bean_file):
28
+ if tree.data != "start":
29
+ raise ValueError("Expected start")
30
+ for child in tree.children:
31
+ if child is None:
32
+ continue
33
+ if child.data != "statement":
34
+ raise ValueError("Expected statement")
35
+ first_child = child.children[0]
36
+ if not isinstance(first_child, Tree):
37
+ continue
38
+ if first_child.data == "date_directive":
39
+ date_directive = first_child.children[0]
40
+ directive_type = date_directive.data.value
41
+ if directive_type != "txn":
42
+ continue
43
+ last_txn = date_directive
44
+ elif first_child.data == "metadata_item":
45
+ metadata_key = first_child.children[0].value
46
+ metadata_value = json.loads(first_child.children[1].value)
47
+ if metadata_key == import_id_key:
48
+ yield ImportedTransaction(
49
+ file=bean_path, lineno=last_txn.meta.line, id=metadata_value
50
+ )
51
+
52
+
53
+ def compute_changes(
54
+ generated_txns: list[GeneratedTransaction], imported_txns: list[ImportedTransaction]
55
+ ) -> dict[pathlib.Path, ChangeSet]:
56
+ generated_id_txns = {txn.id: txn for txn in generated_txns}
57
+ imported_id_txns = {txn.id: txn for txn in imported_txns}
58
+
59
+ to_remove = collections.defaultdict(list)
60
+ for txn in imported_txns:
61
+ generated_txn = generated_id_txns.get(txn.id)
62
+ if generated_txn is not None and txn.file != pathlib.Path(generated_txn.file):
63
+ # it appears that the generated txn's file is different from the old one, let's remove it
64
+ to_remove[txn.file].append(txn)
65
+
66
+ to_add = collections.defaultdict(list)
67
+ to_update = collections.defaultdict(dict)
68
+ for txn in generated_txns:
69
+ imported_txn = imported_id_txns.get(txn.id)
70
+ generated_file = pathlib.Path(txn.file)
71
+ if imported_txn is not None and imported_txn.file == generated_file:
72
+ to_update[generated_file][imported_txn.lineno] = txn
73
+ else:
74
+ to_add[generated_file].append(txn)
75
+
76
+ all_files = frozenset(to_remove.keys()).union(to_add.keys()).union(to_update.keys())
77
+ return {
78
+ file_path: ChangeSet(
79
+ remove=to_remove[file_path],
80
+ add=to_add[file_path],
81
+ update=to_update[file_path],
82
+ )
83
+ for file_path in all_files
84
+ }
85
+
86
+
87
+ def to_parser_entry(parser: Lark, text: str) -> Entry:
88
+ tree = parser.parse(text.strip())
89
+ entries, _ = collect_entries(tree)
90
+ if len(entries) != 1:
91
+ raise ValueError("Expected exactly only one entry")
92
+ return entries[0]
93
+
94
+
95
+ def posting_to_text(posting: GeneratedPosting) -> str:
96
+ return (" " * 2) + " ".join([posting.account, posting.amount, posting.currency])
97
+
98
+
99
+ def txn_to_text(
100
+ txn: GeneratedTransaction, import_id_key: str = constants.IMPORT_ID_KEY
101
+ ) -> str:
102
+ columns = [
103
+ txn.date,
104
+ txn.flag,
105
+ *((json.dumps(txn.payee),) if txn.payee is not None else ()),
106
+ json.dumps(txn.narration),
107
+ ]
108
+ line = " ".join(columns)
109
+ return "\n".join(
110
+ [
111
+ line,
112
+ f" {import_id_key}: {json.dumps(txn.id)}",
113
+ *(map(posting_to_text, txn.postings)),
114
+ ]
115
+ )
116
+
117
+
118
+ def apply_change_set(
119
+ tree: Lark, change_set: ChangeSet, import_id_key: str = constants.IMPORT_ID_KEY
120
+ ) -> Lark:
121
+ if tree.data != "start":
122
+ raise ValueError("expected start as the root rule")
123
+ parser = make_parser()
124
+
125
+ lines_to_remove = [txn.lineno for txn in change_set.remove]
126
+ line_to_entries = {
127
+ lineno: to_parser_entry(parser, txn_to_text(txn))
128
+ for lineno, txn in change_set.update.items()
129
+ }
130
+ entries_to_add = [
131
+ to_parser_entry(parser, txn_to_text(txn, import_id_key=import_id_key))
132
+ for txn in change_set.add
133
+ ]
134
+
135
+ new_tree = copy.deepcopy(tree)
136
+ entries, tail_comments = collect_entries(new_tree)
137
+
138
+ tailing_comments_entry: typing.Optional[Entry] = None
139
+ if tail_comments:
140
+ tailing_comments_entry = Entry(
141
+ type=EntryType.COMMENTS,
142
+ comments=tail_comments,
143
+ statement=None,
144
+ metadata=[],
145
+ postings=[],
146
+ )
147
+
148
+ new_children = []
149
+ for entry in itertools.chain(entries, entries_to_add):
150
+ if entry.type == EntryType.COMMENTS:
151
+ new_children.extend(entry.comments)
152
+ continue
153
+ if entry.statement.meta.line in lines_to_remove:
154
+ # We also drop the comments
155
+ continue
156
+ actual_entry = line_to_entries.get(entry.statement.meta.line, entry)
157
+ # use comments from existing entry regardless
158
+ new_children.extend(entry.comments)
159
+ new_children.append(actual_entry.statement)
160
+ for metadata in actual_entry.metadata:
161
+ new_children.extend(metadata.comments)
162
+ new_children.append(metadata.statement)
163
+ for posting in actual_entry.postings:
164
+ new_children.extend(posting.comments)
165
+ new_children.append(posting.statement)
166
+ for metadata in posting.metadata:
167
+ new_children.extend(metadata.comments)
168
+ new_children.append(metadata.statement)
169
+
170
+ if tailing_comments_entry is not None:
171
+ new_children.extend(tailing_comments_entry.comments)
172
+
173
+ new_tree.children = new_children
174
+ return new_tree
@@ -0,0 +1,197 @@
1
+ import dataclasses
2
+ import logging
3
+ import os
4
+ import pathlib
5
+ import re
6
+ import typing
7
+
8
+ from beanhub_extract.data_types import Transaction
9
+ from beanhub_extract.extractors import ALL_EXTRACTORS
10
+ from beanhub_extract.utils import strip_txn_base_path
11
+ from jinja2.sandbox import SandboxedEnvironment
12
+
13
+ from .data_types import ActionType
14
+ from .data_types import GeneratedPosting
15
+ from .data_types import GeneratedTransaction
16
+ from .data_types import ImportDoc
17
+ from .data_types import ImportRule
18
+ from .data_types import InputConfigDetails
19
+ from .data_types import PostingTemplate
20
+ from .data_types import SimpleFileMatch
21
+ from .data_types import SimpleTxnMatchRule
22
+ from .data_types import StrContainsMatch
23
+ from .data_types import StrExactMatch
24
+ from .data_types import StrMatch
25
+ from .data_types import StrPrefixMatch
26
+ from .data_types import StrRegexMatch
27
+ from .data_types import StrSuffixMatch
28
+
29
+
30
+ DEFAULT_TXN_TEMPLATE = dict(
31
+ id="{{ file }}:{{ lineno }}",
32
+ date="{{ date }}",
33
+ flag="*",
34
+ narration="{{ desc | default(bank_desc) }}",
35
+ )
36
+
37
+
38
+ def walk_dir_files(
39
+ target_dir: pathlib.Path,
40
+ ) -> typing.Generator[pathlib.Path, None, None]:
41
+ for root, dirs, files in os.walk(target_dir):
42
+ for file in files:
43
+ yield pathlib.Path(root) / file
44
+
45
+
46
+ def match_file(
47
+ pattern: SimpleFileMatch, filepath: pathlib.Path | pathlib.PurePath
48
+ ) -> bool:
49
+ if isinstance(pattern, str):
50
+ return filepath.match(pattern)
51
+ if isinstance(pattern, StrRegexMatch):
52
+ return re.match(pattern.regex, str(filepath)) is not None
53
+ elif isinstance(pattern, StrExactMatch):
54
+ return str(filepath) == pattern.equals
55
+ else:
56
+ raise ValueError(f"Unexpected file match type {type(pattern)}")
57
+
58
+
59
+ def match_str(pattern: StrMatch, value: str | None) -> bool:
60
+ if value is None:
61
+ return False
62
+ if isinstance(pattern, str):
63
+ return re.match(pattern, value) is not None
64
+ elif isinstance(pattern, StrExactMatch):
65
+ return value == pattern.equals
66
+ elif isinstance(pattern, StrPrefixMatch):
67
+ return value.startswith(pattern.prefix)
68
+ elif isinstance(pattern, StrSuffixMatch):
69
+ return value.endswith(pattern.suffix)
70
+ elif isinstance(pattern, StrContainsMatch):
71
+ return pattern.contains in value
72
+ else:
73
+ raise ValueError(f"Unexpected str match type {type(pattern)}")
74
+
75
+
76
+ def match_transaction(txn: Transaction, rule: SimpleTxnMatchRule) -> bool:
77
+ return all(
78
+ match_str(getattr(rule, key), getattr(txn, key))
79
+ for key, pattern in rule.dict().items()
80
+ if pattern is not None
81
+ )
82
+
83
+
84
+ def first_non_none(*values):
85
+ return next((value for value in values if value is not None), None)
86
+
87
+
88
+ def process_transaction(
89
+ template_env: SandboxedEnvironment,
90
+ input_config: InputConfigDetails,
91
+ import_rules: list[ImportRule],
92
+ txn: Transaction,
93
+ default_import_id: str | None = None,
94
+ ) -> typing.Generator[GeneratedTransaction, None, None]:
95
+ txn_ctx = dataclasses.asdict(txn)
96
+ default_txn = input_config.default_txn
97
+
98
+ def render_str(value: str | None) -> str | None:
99
+ if value is None:
100
+ return None
101
+ return template_env.from_string(value).render(**txn_ctx)
102
+
103
+ for import_rule in import_rules:
104
+ if not match_transaction(txn, import_rule.match):
105
+ continue
106
+ for action in import_rule.actions:
107
+ if action.type != ActionType.add_txn:
108
+ # we only support add txn for now
109
+ raise ValueError(f"Unsupported action type {action.type}")
110
+
111
+ template_values = {
112
+ key: first_non_none(
113
+ getattr(action.txn, key),
114
+ getattr(default_txn, key) if default_txn is not None else None,
115
+ DEFAULT_TXN_TEMPLATE.get(key),
116
+ )
117
+ for key in ("date", "flag", "narration", "payee")
118
+ }
119
+ template_values["id"] = first_non_none(
120
+ getattr(action.txn, "id"),
121
+ getattr(default_txn, "id") if default_txn is not None else None,
122
+ default_import_id,
123
+ DEFAULT_TXN_TEMPLATE["id"],
124
+ )
125
+
126
+ posting_templates: list[PostingTemplate] = []
127
+ if input_config.prepend_postings is not None:
128
+ posting_templates.extend(input_config.prepend_postings)
129
+ if action.txn.postings is not None:
130
+ posting_templates.extend(action.txn.postings)
131
+ elif default_txn is not None and default_txn.postings is not None:
132
+ posting_templates.extend(default_txn.postings)
133
+ if input_config.appending_postings is not None:
134
+ posting_templates.extend(input_config.appending_postings)
135
+
136
+ generated_postings = []
137
+ for posting_template in posting_templates:
138
+ generated_postings.append(
139
+ GeneratedPosting(
140
+ account=render_str(posting_template.account),
141
+ amount=render_str(posting_template.amount),
142
+ currency=render_str(posting_template.currency),
143
+ )
144
+ )
145
+
146
+ yield GeneratedTransaction(
147
+ file=render_str(action.file),
148
+ postings=generated_postings,
149
+ **{key: render_str(value) for key, value in template_values.items()},
150
+ )
151
+ break
152
+
153
+
154
+ def process_imports(
155
+ import_doc: ImportDoc,
156
+ input_dir: pathlib.Path,
157
+ ) -> typing.Generator[GeneratedTransaction, None, None]:
158
+ logger = logging.getLogger(__name__)
159
+ template_env = SandboxedEnvironment()
160
+ for filepath in walk_dir_files(input_dir):
161
+ processed = False
162
+ for input_config in import_doc.inputs:
163
+ if not match_file(input_config.match, filepath):
164
+ continue
165
+ rel_filepath = filepath.relative_to(input_dir)
166
+ extractor_name = input_config.config.extractor
167
+ if extractor_name is None:
168
+ # TODO: identify input file automatically
169
+ pass
170
+ else:
171
+ extractor_cls = ALL_EXTRACTORS.get(extractor_name)
172
+ if extractor_cls is None:
173
+ logger.warning(
174
+ "Extractor %s not found for file %s, skip",
175
+ extractor_name,
176
+ rel_filepath,
177
+ )
178
+ continue
179
+ logger.info(
180
+ "Processing file %s with extractor %s", rel_filepath, extractor_name
181
+ )
182
+ with filepath.open("rt") as fo:
183
+ extractor = extractor_cls(fo)
184
+ for transaction in extractor():
185
+ txn = strip_txn_base_path(input_dir, transaction)
186
+ for generated_txn in process_transaction(
187
+ template_env=template_env,
188
+ input_config=input_config.config,
189
+ import_rules=import_doc.imports,
190
+ default_import_id=getattr(extractor, "DEFAULT_IMPORT_ID", None),
191
+ txn=txn,
192
+ ):
193
+ yield generated_txn
194
+ processed = True
195
+ break
196
+ if processed:
197
+ continue
@@ -0,0 +1,27 @@
1
+ [tool.poetry]
2
+ name = "beanhub-import"
3
+ version = "0.0.1"
4
+ description = "The simple library for import extracted transactions provided by beanhub-extract and generate corresponding Beancount transactions based on predefined rules"
5
+ authors = ["Fang-Pen Lin <fangpen@launchplatform.com>"]
6
+ license = "MIT"
7
+ readme = "README.md"
8
+
9
+ [tool.poetry.dependencies]
10
+ python = "^3.9"
11
+ pytz = "^2024.1"
12
+ beanhub-extract = "^0.0.5"
13
+ pydantic = "^2.7.1"
14
+ pyyaml = "^6.0.1"
15
+ jinja2 = "^3.1.3"
16
+ beancount-black = "^1.0.2"
17
+ beancount-parser = "^1.1.0"
18
+
19
+ [tool.poetry.dev-dependencies]
20
+
21
+ [tool.poetry.group.dev.dependencies]
22
+ pytest = "^7.4.1"
23
+ pytest-mock = "^3.11.1"
24
+
25
+ [build-system]
26
+ requires = ["poetry-core>=1.0.0"]
27
+ build-backend = "poetry.core.masonry.api"