PyPI - beanhub-import - Versions diffs - 0.0.1__tar.gz - Mend

beanhub-import 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

beanhub_import-0.0.1/LICENSE +21 -0
beanhub_import-0.0.1/PKG-INFO +26 -0
beanhub_import-0.0.1/README.md +2 -0
beanhub_import-0.0.1/beanhub_import/__init__.py +0 -0
beanhub_import-0.0.1/beanhub_import/constants.py +1 -0
beanhub_import-0.0.1/beanhub_import/data_types.py +158 -0
beanhub_import-0.0.1/beanhub_import/post_processor.py +174 -0
beanhub_import-0.0.1/beanhub_import/processor.py +197 -0
beanhub_import-0.0.1/pyproject.toml +27 -0

beanhub_import-0.0.1/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2024 Launch Platform
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

beanhub_import-0.0.1/PKG-INFO ADDED Viewed

@@ -0,0 +1,26 @@
+Metadata-Version: 2.1
+Name: beanhub-import
+Version: 0.0.1
+Summary: The simple library for import extracted transactions provided by beanhub-extract and generate corresponding Beancount transactions based on predefined rules
+License: MIT
+Author: Fang-Pen Lin
+Author-email: fangpen@launchplatform.com
+Requires-Python: >=3.9,<4.0
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Requires-Dist: beancount-black (>=1.0.2,<2.0.0)
+Requires-Dist: beancount-parser (>=1.1.0,<2.0.0)
+Requires-Dist: beanhub-extract (>=0.0.5,<0.0.6)
+Requires-Dist: jinja2 (>=3.1.3,<4.0.0)
+Requires-Dist: pydantic (>=2.7.1,<3.0.0)
+Requires-Dist: pytz (>=2024.1,<2025.0)
+Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
+Description-Content-Type: text/markdown
+# beanhub-import
+The simple library for import extracted transactions provided by beanhub-extract and generate corresponding Beancount transactions based on predefined rules

beanhub_import-0.0.1/README.md ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ # beanhub-import
2	+ The simple library for import extracted transactions provided by beanhub-extract and generate corresponding Beancount transactions based on predefined rules

beanhub_import-0.0.1/beanhub_import/__init__.py ADDED Viewed

File without changes

beanhub_import-0.0.1/beanhub_import/constants.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ IMPORT_ID_KEY = "import-id"

beanhub_import-0.0.1/beanhub_import/data_types.py ADDED Viewed

@@ -0,0 +1,158 @@
+import dataclasses
+import enum
+import pathlib
+import typing
+import pydantic
+from pydantic import BaseModel
+class ImportBaseModel(BaseModel):
+    pass
+class StrRegexMatch(ImportBaseModel):
+    regex: str
+class StrExactMatch(ImportBaseModel):
+    equals: str
+class StrPrefixMatch(ImportBaseModel):
+    prefix: str
+class StrSuffixMatch(ImportBaseModel):
+    suffix: str
+class StrContainsMatch(ImportBaseModel):
+    contains: str
+StrMatch = str | StrPrefixMatch | StrSuffixMatch | StrExactMatch | StrContainsMatch
+class SimpleTxnMatchRule(ImportBaseModel):
+    extractor: StrMatch | None = None
+    file: StrMatch | None = None
+    date: StrMatch | None = None
+    post_date: StrMatch | None = None
+    timezone: StrMatch = None
+    desc: StrMatch = None
+    bank_desc: StrMatch = None
+    currency: StrMatch | None = None
+    category: StrMatch | None = None
+    status: StrMatch | None = None
+    type: StrMatch | None = None
+    source_account: StrMatch | None = None
+    dest_account: StrMatch | None = None
+    note: StrMatch | None = None
+    reference: StrMatch | None = None
+    payee: StrMatch | None = None
+TxnMatchRule = SimpleTxnMatchRule
+@enum.unique
+class ActionType(str, enum.Enum):
+    add_txn = "add_txn"
+class PostingTemplate(ImportBaseModel):
+    # account of the posting
+    account: str | None = None
+    # amount of the posting
+    amount: str | None = None
+    # currency of the posting
+    currency: str | None = None
+    # TODO: support cost / price and etc
+class TransactionTemplate(ImportBaseModel):
+    # the import-id for de-duplication
+    id: str | None = None
+    date: str | None = None
+    flag: str | None = None
+    narration: str | None = None
+    payee: str | None = None
+    postings: list[PostingTemplate] | None = None
+class GeneratedPosting(ImportBaseModel):
+    # account of the posting
+    account: str
+    # amount of the posting
+    amount: str | None = None
+    # currency of the posting
+    currency: str | None = None
+    # TODO: support cost / price and etc
+class GeneratedTransaction(ImportBaseModel):
+    file: str
+    # the import-id for de-duplication
+    id: str
+    date: str
+    flag: str
+    narration: str
+    payee: str | None = None
+    postings: list[GeneratedPosting]
+class ActionAddTxn(ImportBaseModel):
+    type: typing.Literal[ActionType.add_txn] = pydantic.Field(ActionType.add_txn)
+    file: str
+    txn: TransactionTemplate
+Action = ActionAddTxn
+SimpleFileMatch = str | StrExactMatch | StrRegexMatch
+class InputConfigDetails(ImportBaseModel):
+    extractor: str | None = None
+    prepend_postings: list[PostingTemplate] | None = None
+    appending_postings: list[PostingTemplate] | None = None
+    default_txn: TransactionTemplate | None = None
+class InputConfig(ImportBaseModel):
+    match: SimpleFileMatch
+    config: InputConfigDetails
+class OutputConfig(ImportBaseModel):
+    match: SimpleFileMatch
+class ImportRule(ImportBaseModel):
+    match: TxnMatchRule
+    actions: list[Action]
+class ImportDoc(ImportBaseModel):
+    inputs: list[InputConfig]
+    imports: list[ImportRule]
+    outputs: list[OutputConfig] | None = None
+@dataclasses.dataclass(frozen=True)
+class ImportedTransaction:
+    file: pathlib.Path
+    lineno: int
+    id: str
+@dataclasses.dataclass(frozen=True)
+class ChangeSet:
+    # list of imported transaction to remove
+    remove: list[ImportedTransaction]
+    # map from
+    update: dict[int, GeneratedTransaction]
+    # list of generated transaction to add
+    add: list[GeneratedTransaction]

beanhub_import-0.0.1/beanhub_import/post_processor.py ADDED Viewed

@@ -0,0 +1,174 @@
+import collections
+import copy
+import itertools
+import json
+import pathlib
+import typing
+from beancount_parser.data_types import Entry
+from beancount_parser.data_types import EntryType
+from beancount_parser.helpers import collect_entries
+from beancount_parser.parser import make_parser
+from beancount_parser.parser import traverse
+from lark import Lark
+from lark import Tree
+from . import constants
+from .data_types import ChangeSet
+from .data_types import GeneratedPosting
+from .data_types import GeneratedTransaction
+from .data_types import ImportedTransaction
+def extract_imported_transactions(
+    parser: Lark, bean_file: pathlib.Path, import_id_key: str = constants.IMPORT_ID_KEY
+) -> typing.Generator[ImportedTransaction, None, None]:
+    last_txn = None
+    for bean_path, tree in traverse(parser=parser, bean_file=bean_file):
+        if tree.data != "start":
+            raise ValueError("Expected start")
+        for child in tree.children:
+            if child is None:
+                continue
+            if child.data != "statement":
+                raise ValueError("Expected statement")
+            first_child = child.children[0]
+            if not isinstance(first_child, Tree):
+                continue
+            if first_child.data == "date_directive":
+                date_directive = first_child.children[0]
+                directive_type = date_directive.data.value
+                if directive_type != "txn":
+                    continue
+                last_txn = date_directive
+            elif first_child.data == "metadata_item":
+                metadata_key = first_child.children[0].value
+                metadata_value = json.loads(first_child.children[1].value)
+                if metadata_key == import_id_key:
+                    yield ImportedTransaction(
+                        file=bean_path, lineno=last_txn.meta.line, id=metadata_value
+                    )
+def compute_changes(
+    generated_txns: list[GeneratedTransaction], imported_txns: list[ImportedTransaction]
+) -> dict[pathlib.Path, ChangeSet]:
+    generated_id_txns = {txn.id: txn for txn in generated_txns}
+    imported_id_txns = {txn.id: txn for txn in imported_txns}
+    to_remove = collections.defaultdict(list)
+    for txn in imported_txns:
+        generated_txn = generated_id_txns.get(txn.id)
+        if generated_txn is not None and txn.file != pathlib.Path(generated_txn.file):
+            # it appears that the generated txn's file is different from the old one, let's remove it
+            to_remove[txn.file].append(txn)
+    to_add = collections.defaultdict(list)
+    to_update = collections.defaultdict(dict)
+    for txn in generated_txns:
+        imported_txn = imported_id_txns.get(txn.id)
+        generated_file = pathlib.Path(txn.file)
+        if imported_txn is not None and imported_txn.file == generated_file:
+            to_update[generated_file][imported_txn.lineno] = txn
+        else:
+            to_add[generated_file].append(txn)
+    all_files = frozenset(to_remove.keys()).union(to_add.keys()).union(to_update.keys())
+    return {
+        file_path: ChangeSet(
+            remove=to_remove[file_path],
+            add=to_add[file_path],
+            update=to_update[file_path],
+        )
+        for file_path in all_files
+    }
+def to_parser_entry(parser: Lark, text: str) -> Entry:
+    tree = parser.parse(text.strip())
+    entries, _ = collect_entries(tree)
+    if len(entries) != 1:
+        raise ValueError("Expected exactly only one entry")
+    return entries[0]
+def posting_to_text(posting: GeneratedPosting) -> str:
+    return (" " * 2) + " ".join([posting.account, posting.amount, posting.currency])
+def txn_to_text(
+    txn: GeneratedTransaction, import_id_key: str = constants.IMPORT_ID_KEY
+) -> str:
+    columns = [
+        txn.date,
+        txn.flag,
+        *((json.dumps(txn.payee),) if txn.payee is not None else ()),
+        json.dumps(txn.narration),
+    ]
+    line = " ".join(columns)
+    return "\n".join(
+        [
+            line,
+            f"  {import_id_key}: {json.dumps(txn.id)}",
+            *(map(posting_to_text, txn.postings)),
+        ]
+    )
+def apply_change_set(
+    tree: Lark, change_set: ChangeSet, import_id_key: str = constants.IMPORT_ID_KEY
+) -> Lark:
+    if tree.data != "start":
+        raise ValueError("expected start as the root rule")
+    parser = make_parser()
+    lines_to_remove = [txn.lineno for txn in change_set.remove]
+    line_to_entries = {
+        lineno: to_parser_entry(parser, txn_to_text(txn))
+        for lineno, txn in change_set.update.items()
+    }
+    entries_to_add = [
+        to_parser_entry(parser, txn_to_text(txn, import_id_key=import_id_key))
+        for txn in change_set.add
+    ]
+    new_tree = copy.deepcopy(tree)
+    entries, tail_comments = collect_entries(new_tree)
+    tailing_comments_entry: typing.Optional[Entry] = None
+    if tail_comments:
+        tailing_comments_entry = Entry(
+            type=EntryType.COMMENTS,
+            comments=tail_comments,
+            statement=None,
+            metadata=[],
+            postings=[],
+        )
+    new_children = []
+    for entry in itertools.chain(entries, entries_to_add):
+        if entry.type == EntryType.COMMENTS:
+            new_children.extend(entry.comments)
+            continue
+        if entry.statement.meta.line in lines_to_remove:
+            # We also drop the comments
+            continue
+        actual_entry = line_to_entries.get(entry.statement.meta.line, entry)
+        # use comments from existing entry regardless
+        new_children.extend(entry.comments)
+        new_children.append(actual_entry.statement)
+        for metadata in actual_entry.metadata:
+            new_children.extend(metadata.comments)
+            new_children.append(metadata.statement)
+        for posting in actual_entry.postings:
+            new_children.extend(posting.comments)
+            new_children.append(posting.statement)
+            for metadata in posting.metadata:
+                new_children.extend(metadata.comments)
+                new_children.append(metadata.statement)
+    if tailing_comments_entry is not None:
+        new_children.extend(tailing_comments_entry.comments)
+    new_tree.children = new_children
+    return new_tree

beanhub_import-0.0.1/beanhub_import/processor.py ADDED Viewed

@@ -0,0 +1,197 @@
+import dataclasses
+import logging
+import os
+import pathlib
+import re
+import typing
+from beanhub_extract.data_types import Transaction
+from beanhub_extract.extractors import ALL_EXTRACTORS
+from beanhub_extract.utils import strip_txn_base_path
+from jinja2.sandbox import SandboxedEnvironment
+from .data_types import ActionType
+from .data_types import GeneratedPosting
+from .data_types import GeneratedTransaction
+from .data_types import ImportDoc
+from .data_types import ImportRule
+from .data_types import InputConfigDetails
+from .data_types import PostingTemplate
+from .data_types import SimpleFileMatch
+from .data_types import SimpleTxnMatchRule
+from .data_types import StrContainsMatch
+from .data_types import StrExactMatch
+from .data_types import StrMatch
+from .data_types import StrPrefixMatch
+from .data_types import StrRegexMatch
+from .data_types import StrSuffixMatch
+DEFAULT_TXN_TEMPLATE = dict(
+    id="{{ file }}:{{ lineno }}",
+    date="{{ date }}",
+    flag="*",
+    narration="{{ desc | default(bank_desc) }}",
+)
+def walk_dir_files(
+    target_dir: pathlib.Path,
+) -> typing.Generator[pathlib.Path, None, None]:
+    for root, dirs, files in os.walk(target_dir):
+        for file in files:
+            yield pathlib.Path(root) / file
+def match_file(
+    pattern: SimpleFileMatch, filepath: pathlib.Path | pathlib.PurePath
+) -> bool:
+    if isinstance(pattern, str):
+        return filepath.match(pattern)
+    if isinstance(pattern, StrRegexMatch):
+        return re.match(pattern.regex, str(filepath)) is not None
+    elif isinstance(pattern, StrExactMatch):
+        return str(filepath) == pattern.equals
+    else:
+        raise ValueError(f"Unexpected file match type {type(pattern)}")
+def match_str(pattern: StrMatch, value: str | None) -> bool:
+    if value is None:
+        return False
+    if isinstance(pattern, str):
+        return re.match(pattern, value) is not None
+    elif isinstance(pattern, StrExactMatch):
+        return value == pattern.equals
+    elif isinstance(pattern, StrPrefixMatch):
+        return value.startswith(pattern.prefix)
+    elif isinstance(pattern, StrSuffixMatch):
+        return value.endswith(pattern.suffix)
+    elif isinstance(pattern, StrContainsMatch):
+        return pattern.contains in value
+    else:
+        raise ValueError(f"Unexpected str match type {type(pattern)}")
+def match_transaction(txn: Transaction, rule: SimpleTxnMatchRule) -> bool:
+    return all(
+        match_str(getattr(rule, key), getattr(txn, key))
+        for key, pattern in rule.dict().items()
+        if pattern is not None
+    )
+def first_non_none(*values):
+    return next((value for value in values if value is not None), None)
+def process_transaction(
+    template_env: SandboxedEnvironment,
+    input_config: InputConfigDetails,
+    import_rules: list[ImportRule],
+    txn: Transaction,
+    default_import_id: str | None = None,
+) -> typing.Generator[GeneratedTransaction, None, None]:
+    txn_ctx = dataclasses.asdict(txn)
+    default_txn = input_config.default_txn
+    def render_str(value: str | None) -> str | None:
+        if value is None:
+            return None
+        return template_env.from_string(value).render(**txn_ctx)
+    for import_rule in import_rules:
+        if not match_transaction(txn, import_rule.match):
+            continue
+        for action in import_rule.actions:
+            if action.type != ActionType.add_txn:
+                # we only support add txn for now
+                raise ValueError(f"Unsupported action type {action.type}")
+            template_values = {
+                key: first_non_none(
+                    getattr(action.txn, key),
+                    getattr(default_txn, key) if default_txn is not None else None,
+                    DEFAULT_TXN_TEMPLATE.get(key),
+                )
+                for key in ("date", "flag", "narration", "payee")
+            }
+            template_values["id"] = first_non_none(
+                getattr(action.txn, "id"),
+                getattr(default_txn, "id") if default_txn is not None else None,
+                default_import_id,
+                DEFAULT_TXN_TEMPLATE["id"],
+            )
+            posting_templates: list[PostingTemplate] = []
+            if input_config.prepend_postings is not None:
+                posting_templates.extend(input_config.prepend_postings)
+            if action.txn.postings is not None:
+                posting_templates.extend(action.txn.postings)
+            elif default_txn is not None and default_txn.postings is not None:
+                posting_templates.extend(default_txn.postings)
+            if input_config.appending_postings is not None:
+                posting_templates.extend(input_config.appending_postings)
+            generated_postings = []
+            for posting_template in posting_templates:
+                generated_postings.append(
+                    GeneratedPosting(
+                        account=render_str(posting_template.account),
+                        amount=render_str(posting_template.amount),
+                        currency=render_str(posting_template.currency),
+                    )
+                )
+            yield GeneratedTransaction(
+                file=render_str(action.file),
+                postings=generated_postings,
+                **{key: render_str(value) for key, value in template_values.items()},
+            )
+        break
+def process_imports(
+    import_doc: ImportDoc,
+    input_dir: pathlib.Path,
+) -> typing.Generator[GeneratedTransaction, None, None]:
+    logger = logging.getLogger(__name__)
+    template_env = SandboxedEnvironment()
+    for filepath in walk_dir_files(input_dir):
+        processed = False
+        for input_config in import_doc.inputs:
+            if not match_file(input_config.match, filepath):
+                continue
+            rel_filepath = filepath.relative_to(input_dir)
+            extractor_name = input_config.config.extractor
+            if extractor_name is None:
+                # TODO: identify input file automatically
+                pass
+            else:
+                extractor_cls = ALL_EXTRACTORS.get(extractor_name)
+                if extractor_cls is None:
+                    logger.warning(
+                        "Extractor %s not found for file %s, skip",
+                        extractor_name,
+                        rel_filepath,
+                    )
+                    continue
+            logger.info(
+                "Processing file %s with extractor %s", rel_filepath, extractor_name
+            )
+            with filepath.open("rt") as fo:
+                extractor = extractor_cls(fo)
+                for transaction in extractor():
+                    txn = strip_txn_base_path(input_dir, transaction)
+                    for generated_txn in process_transaction(
+                        template_env=template_env,
+                        input_config=input_config.config,
+                        import_rules=import_doc.imports,
+                        default_import_id=getattr(extractor, "DEFAULT_IMPORT_ID", None),
+                        txn=txn,
+                    ):
+                        yield generated_txn
+            processed = True
+            break
+        if processed:
+            continue

beanhub_import-0.0.1/pyproject.toml ADDED Viewed

@@ -0,0 +1,27 @@
+[tool.poetry]
+name = "beanhub-import"
+version = "0.0.1"
+description = "The simple library for import extracted transactions provided by beanhub-extract and generate corresponding Beancount transactions based on predefined rules"
+authors = ["Fang-Pen Lin <fangpen@launchplatform.com>"]
+license = "MIT"
+readme = "README.md"
+[tool.poetry.dependencies]
+python = "^3.9"
+pytz = "^2024.1"
+beanhub-extract = "^0.0.5"
+pydantic = "^2.7.1"
+pyyaml = "^6.0.1"
+jinja2 = "^3.1.3"
+beancount-black = "^1.0.2"
+beancount-parser = "^1.1.0"
+[tool.poetry.dev-dependencies]
+[tool.poetry.group.dev.dependencies]
+pytest = "^7.4.1"
+pytest-mock = "^3.11.1"
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"