monopoly-core 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- monopoly/__init__.py +3 -0
- monopoly/banks/__init__.py +99 -0
- monopoly/banks/base.py +54 -0
- monopoly/banks/citibank/__init__.py +3 -0
- monopoly/banks/citibank/citibank.py +36 -0
- monopoly/banks/dbs/__init__.py +3 -0
- monopoly/banks/dbs/dbs.py +40 -0
- monopoly/banks/hsbc/__init__.py +3 -0
- monopoly/banks/hsbc/hsbc.py +40 -0
- monopoly/banks/ocbc/__init__.py +3 -0
- monopoly/banks/ocbc/ocbc.py +43 -0
- monopoly/banks/standard_chartered/__init__.py +3 -0
- monopoly/banks/standard_chartered/standard_chartered.py +36 -0
- monopoly/cli.py +339 -0
- monopoly/config.py +117 -0
- monopoly/constants.py +151 -0
- monopoly/examples/__init__.py +3 -0
- monopoly/examples/example_bank.py +27 -0
- monopoly/examples/example_statement.pdf +0 -0
- monopoly/examples/single_statement.py +31 -0
- monopoly/generic/__init__.py +11 -0
- monopoly/generic/generic.py +387 -0
- monopoly/generic/generic_handler.py +84 -0
- monopoly/generic/patterns.py +47 -0
- monopoly/handler.py +41 -0
- monopoly/log.py +18 -0
- monopoly/metadata.py +99 -0
- monopoly/pdf.py +182 -0
- monopoly/pipeline.py +137 -0
- monopoly/statements/__init__.py +5 -0
- monopoly/statements/base.py +205 -0
- monopoly/statements/credit_statement.py +71 -0
- monopoly/statements/debit_statement.py +114 -0
- monopoly/statements/transaction.py +138 -0
- monopoly/write.py +56 -0
- monopoly_core-0.9.0.dist-info/LICENSE.md +661 -0
- monopoly_core-0.9.0.dist-info/METADATA +93 -0
- monopoly_core-0.9.0.dist-info/RECORD +44 -0
- monopoly_core-0.9.0.dist-info/WHEEL +4 -0
- monopoly_core-0.9.0.dist-info/entry_points.txt +3 -0
- test_utils/__init__.py +3 -0
- test_utils/banks.py +21 -0
- test_utils/skip.py +27 -0
- test_utils/transactions.py +20 -0
monopoly/__init__.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from dataclasses import Field, fields
|
|
3
|
+
from itertools import product
|
|
4
|
+
from typing import Type
|
|
5
|
+
|
|
6
|
+
from monopoly.constants import EncryptionIdentifier, MetadataIdentifier
|
|
7
|
+
|
|
8
|
+
from ..examples.example_bank import ExampleBank
|
|
9
|
+
from .base import BankBase
|
|
10
|
+
from .citibank import Citibank
|
|
11
|
+
from .dbs import Dbs
|
|
12
|
+
from .hsbc import Hsbc
|
|
13
|
+
from .ocbc import Ocbc
|
|
14
|
+
from .standard_chartered import StandardChartered
|
|
15
|
+
|
|
16
|
+
banks: list[Type[BankBase]] = [
|
|
17
|
+
Citibank,
|
|
18
|
+
Dbs,
|
|
19
|
+
ExampleBank,
|
|
20
|
+
Hsbc,
|
|
21
|
+
Ocbc,
|
|
22
|
+
StandardChartered,
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class UnsupportedBankError(Exception):
|
|
29
|
+
"""Raised when a processor cannot be found for a specific bank"""
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def detect_bank(
|
|
33
|
+
metadata_items: list[EncryptionIdentifier | MetadataIdentifier],
|
|
34
|
+
) -> Type[BankBase] | None:
|
|
35
|
+
"""
|
|
36
|
+
Reads the encryption metadata or actual metadata (if the PDF is not encrypted),
|
|
37
|
+
and checks for a bank based on unique identifiers.
|
|
38
|
+
"""
|
|
39
|
+
for bank in banks:
|
|
40
|
+
if is_bank_identified(metadata_items, bank):
|
|
41
|
+
return bank
|
|
42
|
+
return None
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def is_bank_identified(
|
|
46
|
+
metadata_items: list[EncryptionIdentifier | MetadataIdentifier],
|
|
47
|
+
bank: Type[BankBase],
|
|
48
|
+
) -> bool:
|
|
49
|
+
"""
|
|
50
|
+
Checks if a bank is identified based on a list of metadata items.
|
|
51
|
+
"""
|
|
52
|
+
for identifier, metadata in product(
|
|
53
|
+
bank.identifiers, metadata_items
|
|
54
|
+
): # type: ignore
|
|
55
|
+
logger.debug(
|
|
56
|
+
"Comparing bank %s identifier %s against PDF metadata %s",
|
|
57
|
+
bank.__name__,
|
|
58
|
+
identifier,
|
|
59
|
+
metadata,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
if all(
|
|
63
|
+
check_matching_field(field, metadata, identifier)
|
|
64
|
+
for field in fields(metadata)
|
|
65
|
+
):
|
|
66
|
+
logger.debug("Match found for bank %s", bank.__name__)
|
|
67
|
+
return True
|
|
68
|
+
|
|
69
|
+
return False
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def check_matching_field(
|
|
73
|
+
field: Field,
|
|
74
|
+
metadata: EncryptionIdentifier | MetadataIdentifier,
|
|
75
|
+
identifier: EncryptionIdentifier | MetadataIdentifier,
|
|
76
|
+
) -> bool:
|
|
77
|
+
"""
|
|
78
|
+
Checks if a field in the metadata matches the corresponding identifier field.
|
|
79
|
+
"""
|
|
80
|
+
# Only compare matching identifier types
|
|
81
|
+
if type(metadata) is not type(identifier):
|
|
82
|
+
return False
|
|
83
|
+
|
|
84
|
+
field_value = getattr(metadata, field.name)
|
|
85
|
+
identifier_value = getattr(identifier, field.name)
|
|
86
|
+
|
|
87
|
+
# allow for partial string matching
|
|
88
|
+
partial_string_match = (
|
|
89
|
+
isinstance(field.type(), str) and identifier_value in field_value
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
# other types should match exactly
|
|
93
|
+
full_match = identifier_value == field_value
|
|
94
|
+
|
|
95
|
+
if any([partial_string_match, full_match]):
|
|
96
|
+
logger.debug("Match: %s - %s", identifier_value, field_value)
|
|
97
|
+
return True
|
|
98
|
+
|
|
99
|
+
return False
|
monopoly/banks/base.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from pydantic import SecretStr
|
|
6
|
+
|
|
7
|
+
from monopoly.config import CreditStatementConfig, DebitStatementConfig, PdfConfig
|
|
8
|
+
from monopoly.constants import EncryptionIdentifier, MetadataIdentifier
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class BankBase(ABC):
|
|
14
|
+
"""
|
|
15
|
+
Abstract class to handle initialization of common variables
|
|
16
|
+
that are shared between bank processor classes.
|
|
17
|
+
|
|
18
|
+
Ensures consistency between bank classes.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
credit_config: Optional[CreditStatementConfig] = None
|
|
22
|
+
debit_config: Optional[DebitStatementConfig] = None
|
|
23
|
+
# pdf_config defaults to an empty object if not overriden
|
|
24
|
+
pdf_config: PdfConfig = PdfConfig()
|
|
25
|
+
|
|
26
|
+
def __init__(self, generic=False):
|
|
27
|
+
self.validate_config(generic)
|
|
28
|
+
self.populate_pdf_config()
|
|
29
|
+
|
|
30
|
+
def validate_config(self, generic: bool):
|
|
31
|
+
# Basic validation to ensure required attributes are set
|
|
32
|
+
if not generic:
|
|
33
|
+
if self.credit_config is None and self.debit_config is None:
|
|
34
|
+
raise NotImplementedError(
|
|
35
|
+
f"{self.__class__.__name__} "
|
|
36
|
+
"must implement either `credit_config` or `debit_config`"
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
def populate_pdf_config(self):
|
|
40
|
+
# Ensure that PDF config always exists
|
|
41
|
+
if not self.pdf_config:
|
|
42
|
+
self.pdf_config = PdfConfig()
|
|
43
|
+
logger.info(f"{self.__class__.__name__}: Using default `pdf_config`")
|
|
44
|
+
|
|
45
|
+
@property
|
|
46
|
+
@abstractmethod
|
|
47
|
+
def identifiers(self) -> list[EncryptionIdentifier | MetadataIdentifier]:
|
|
48
|
+
raise NotImplementedError("Identifiers must be defined")
|
|
49
|
+
|
|
50
|
+
@property
|
|
51
|
+
def passwords(self) -> Optional[list[SecretStr]]:
|
|
52
|
+
if self.passwords:
|
|
53
|
+
return self.passwords
|
|
54
|
+
return None
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from monopoly.config import CreditStatementConfig, PdfConfig, passwords
|
|
4
|
+
from monopoly.constants import (
|
|
5
|
+
BankNames,
|
|
6
|
+
CreditTransactionPatterns,
|
|
7
|
+
MetadataIdentifier,
|
|
8
|
+
StatementBalancePatterns,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
from ..base import BankBase
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class Citibank(BankBase):
|
|
17
|
+
credit_config = CreditStatementConfig(
|
|
18
|
+
bank_name=BankNames.CITIBANK,
|
|
19
|
+
statement_date_pattern=r"Statement\sDate\s+(.*)",
|
|
20
|
+
prev_balance_pattern=StatementBalancePatterns.CITIBANK,
|
|
21
|
+
transaction_pattern=CreditTransactionPatterns.CITIBANK,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
pdf_config = PdfConfig(
|
|
25
|
+
page_bbox=(20, 0, 595, 840),
|
|
26
|
+
page_range=(0, -3),
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
identifiers = [
|
|
30
|
+
MetadataIdentifier(
|
|
31
|
+
creator="Ricoh Americas Corporation, AFP2PDF",
|
|
32
|
+
producer="Ricoh Americas Corporation, AFP2PDF",
|
|
33
|
+
)
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
passwords = passwords.citibank_pdf_passwords
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from monopoly.config import CreditStatementConfig, DebitStatementConfig
|
|
4
|
+
from monopoly.constants import (
|
|
5
|
+
BankNames,
|
|
6
|
+
CreditTransactionPatterns,
|
|
7
|
+
DebitTransactionPatterns,
|
|
8
|
+
EncryptionIdentifier,
|
|
9
|
+
MetadataIdentifier,
|
|
10
|
+
StatementBalancePatterns,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
from ..base import BankBase
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class Dbs(BankBase):
|
|
19
|
+
credit_config = CreditStatementConfig(
|
|
20
|
+
bank_name=BankNames.DBS,
|
|
21
|
+
statement_date_pattern=r"(\d{2}\s[A-Za-z]{3}\s\d{4})",
|
|
22
|
+
multiline_transactions=False,
|
|
23
|
+
transaction_pattern=CreditTransactionPatterns.DBS,
|
|
24
|
+
prev_balance_pattern=StatementBalancePatterns.DBS,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
debit_config = DebitStatementConfig(
|
|
28
|
+
bank_name=BankNames.DBS,
|
|
29
|
+
statement_date_pattern=r"(\d{2}\s[A-Za-z]{3}\s\d{4})",
|
|
30
|
+
multiline_transactions=True,
|
|
31
|
+
debit_statement_identifier=r"(WITHDRAWAL.*DEPOSIT.*BALANCE)",
|
|
32
|
+
transaction_pattern=DebitTransactionPatterns.DBS,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
identifiers = [
|
|
36
|
+
EncryptionIdentifier(
|
|
37
|
+
pdf_version=1.4, algorithm=2, revision=3, length=128, permissions=-1852
|
|
38
|
+
),
|
|
39
|
+
MetadataIdentifier(creator="Quadient CXM AG"),
|
|
40
|
+
]
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from monopoly.config import CreditStatementConfig, PdfConfig, passwords
|
|
4
|
+
from monopoly.constants import (
|
|
5
|
+
BankNames,
|
|
6
|
+
CreditTransactionPatterns,
|
|
7
|
+
EncryptionIdentifier,
|
|
8
|
+
MetadataIdentifier,
|
|
9
|
+
StatementBalancePatterns,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
from ..base import BankBase
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Hsbc(BankBase):
|
|
18
|
+
credit_config = CreditStatementConfig(
|
|
19
|
+
bank_name=BankNames.HSBC,
|
|
20
|
+
statement_date_pattern=r"Statement From .* to (\d{2}\s[A-Z]{3}\s\d{4})",
|
|
21
|
+
prev_balance_pattern=StatementBalancePatterns.HSBC,
|
|
22
|
+
transaction_pattern=CreditTransactionPatterns.HSBC,
|
|
23
|
+
multiline_transactions=True,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
pdf_config = PdfConfig(
|
|
27
|
+
page_bbox=(0, 0, 379, 842),
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
identifiers = [
|
|
31
|
+
EncryptionIdentifier(
|
|
32
|
+
pdf_version=1.6, algorithm=4, revision=4, length=128, permissions=-1804
|
|
33
|
+
),
|
|
34
|
+
MetadataIdentifier(
|
|
35
|
+
title="PRJ_BEAGLE_ST_CNS_SGH_APP_Orchid",
|
|
36
|
+
creator="OpenText Exstream",
|
|
37
|
+
),
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
passwords = passwords.hsbc_pdf_passwords
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from monopoly.config import CreditStatementConfig, DebitStatementConfig, passwords
|
|
4
|
+
from monopoly.constants import (
|
|
5
|
+
BankNames,
|
|
6
|
+
CreditTransactionPatterns,
|
|
7
|
+
DebitTransactionPatterns,
|
|
8
|
+
EncryptionIdentifier,
|
|
9
|
+
MetadataIdentifier,
|
|
10
|
+
StatementBalancePatterns,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
from ..base import BankBase
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class Ocbc(BankBase):
|
|
19
|
+
credit_config = CreditStatementConfig(
|
|
20
|
+
bank_name=BankNames.OCBC,
|
|
21
|
+
statement_date_pattern=r"(\d{2}\-\d{2}\-\d{4})",
|
|
22
|
+
prev_balance_pattern=StatementBalancePatterns.OCBC,
|
|
23
|
+
transaction_pattern=CreditTransactionPatterns.OCBC,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
debit_config = DebitStatementConfig(
|
|
27
|
+
bank_name=BankNames.OCBC,
|
|
28
|
+
statement_date_pattern=r"TO\s(\d+\s[A-Za-z]{3}\s\d{4})",
|
|
29
|
+
debit_statement_identifier=r"(Withdrawal.*Deposit.*Balance)",
|
|
30
|
+
transaction_pattern=DebitTransactionPatterns.OCBC,
|
|
31
|
+
multiline_transactions=True,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
identifiers = [
|
|
35
|
+
EncryptionIdentifier(
|
|
36
|
+
pdf_version=1.4, algorithm=4, revision=4, length=128, permissions=-1036
|
|
37
|
+
),
|
|
38
|
+
MetadataIdentifier(
|
|
39
|
+
creator="pdfgen", producer="Streamline PDFGen for OCBC Group"
|
|
40
|
+
),
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
passwords = passwords.ocbc_pdf_passwords
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from monopoly.config import CreditStatementConfig, PdfConfig, passwords
|
|
4
|
+
from monopoly.constants import (
|
|
5
|
+
BankNames,
|
|
6
|
+
CreditTransactionPatterns,
|
|
7
|
+
MetadataIdentifier,
|
|
8
|
+
StatementBalancePatterns,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
from ..base import BankBase
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class StandardChartered(BankBase):
|
|
17
|
+
credit_config = CreditStatementConfig(
|
|
18
|
+
bank_name=BankNames.STANDARD_CHARTERED,
|
|
19
|
+
statement_date_pattern=r"(\d{2}\s\w+\s\d{4})",
|
|
20
|
+
prev_balance_pattern=StatementBalancePatterns.STANDARD_CHARTERED,
|
|
21
|
+
transaction_pattern=CreditTransactionPatterns.STANDARD_CHARTERED,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
pdf_config = PdfConfig(
|
|
25
|
+
page_range=(0, -1),
|
|
26
|
+
page_bbox=(0, 0, 580, 820),
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
identifiers = [
|
|
30
|
+
MetadataIdentifier(
|
|
31
|
+
title="eStatement",
|
|
32
|
+
producer="iText",
|
|
33
|
+
)
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
passwords = passwords.standard_chartered_pdf_passwords
|