monopoly-core 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. monopoly/__init__.py +3 -0
  2. monopoly/banks/__init__.py +99 -0
  3. monopoly/banks/base.py +54 -0
  4. monopoly/banks/citibank/__init__.py +3 -0
  5. monopoly/banks/citibank/citibank.py +36 -0
  6. monopoly/banks/dbs/__init__.py +3 -0
  7. monopoly/banks/dbs/dbs.py +40 -0
  8. monopoly/banks/hsbc/__init__.py +3 -0
  9. monopoly/banks/hsbc/hsbc.py +40 -0
  10. monopoly/banks/ocbc/__init__.py +3 -0
  11. monopoly/banks/ocbc/ocbc.py +43 -0
  12. monopoly/banks/standard_chartered/__init__.py +3 -0
  13. monopoly/banks/standard_chartered/standard_chartered.py +36 -0
  14. monopoly/cli.py +339 -0
  15. monopoly/config.py +117 -0
  16. monopoly/constants.py +151 -0
  17. monopoly/examples/__init__.py +3 -0
  18. monopoly/examples/example_bank.py +27 -0
  19. monopoly/examples/example_statement.pdf +0 -0
  20. monopoly/examples/single_statement.py +31 -0
  21. monopoly/generic/__init__.py +11 -0
  22. monopoly/generic/generic.py +387 -0
  23. monopoly/generic/generic_handler.py +84 -0
  24. monopoly/generic/patterns.py +47 -0
  25. monopoly/handler.py +41 -0
  26. monopoly/log.py +18 -0
  27. monopoly/metadata.py +99 -0
  28. monopoly/pdf.py +182 -0
  29. monopoly/pipeline.py +137 -0
  30. monopoly/statements/__init__.py +5 -0
  31. monopoly/statements/base.py +205 -0
  32. monopoly/statements/credit_statement.py +71 -0
  33. monopoly/statements/debit_statement.py +114 -0
  34. monopoly/statements/transaction.py +138 -0
  35. monopoly/write.py +56 -0
  36. monopoly_core-0.9.0.dist-info/LICENSE.md +661 -0
  37. monopoly_core-0.9.0.dist-info/METADATA +93 -0
  38. monopoly_core-0.9.0.dist-info/RECORD +44 -0
  39. monopoly_core-0.9.0.dist-info/WHEEL +4 -0
  40. monopoly_core-0.9.0.dist-info/entry_points.txt +3 -0
  41. test_utils/__init__.py +3 -0
  42. test_utils/banks.py +21 -0
  43. test_utils/skip.py +27 -0
  44. test_utils/transactions.py +20 -0
monopoly/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ from monopoly.log import get_logger
2
+
3
+ logger = get_logger()
@@ -0,0 +1,99 @@
1
+ import logging
2
+ from dataclasses import Field, fields
3
+ from itertools import product
4
+ from typing import Type
5
+
6
+ from monopoly.constants import EncryptionIdentifier, MetadataIdentifier
7
+
8
+ from ..examples.example_bank import ExampleBank
9
+ from .base import BankBase
10
+ from .citibank import Citibank
11
+ from .dbs import Dbs
12
+ from .hsbc import Hsbc
13
+ from .ocbc import Ocbc
14
+ from .standard_chartered import StandardChartered
15
+
16
+ banks: list[Type[BankBase]] = [
17
+ Citibank,
18
+ Dbs,
19
+ ExampleBank,
20
+ Hsbc,
21
+ Ocbc,
22
+ StandardChartered,
23
+ ]
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ class UnsupportedBankError(Exception):
29
+ """Raised when a processor cannot be found for a specific bank"""
30
+
31
+
32
+ def detect_bank(
33
+ metadata_items: list[EncryptionIdentifier | MetadataIdentifier],
34
+ ) -> Type[BankBase] | None:
35
+ """
36
+ Reads the encryption metadata or actual metadata (if the PDF is not encrypted),
37
+ and checks for a bank based on unique identifiers.
38
+ """
39
+ for bank in banks:
40
+ if is_bank_identified(metadata_items, bank):
41
+ return bank
42
+ return None
43
+
44
+
45
+ def is_bank_identified(
46
+ metadata_items: list[EncryptionIdentifier | MetadataIdentifier],
47
+ bank: Type[BankBase],
48
+ ) -> bool:
49
+ """
50
+ Checks if a bank is identified based on a list of metadata items.
51
+ """
52
+ for identifier, metadata in product(
53
+ bank.identifiers, metadata_items
54
+ ): # type: ignore
55
+ logger.debug(
56
+ "Comparing bank %s identifier %s against PDF metadata %s",
57
+ bank.__name__,
58
+ identifier,
59
+ metadata,
60
+ )
61
+
62
+ if all(
63
+ check_matching_field(field, metadata, identifier)
64
+ for field in fields(metadata)
65
+ ):
66
+ logger.debug("Match found for bank %s", bank.__name__)
67
+ return True
68
+
69
+ return False
70
+
71
+
72
+ def check_matching_field(
73
+ field: Field,
74
+ metadata: EncryptionIdentifier | MetadataIdentifier,
75
+ identifier: EncryptionIdentifier | MetadataIdentifier,
76
+ ) -> bool:
77
+ """
78
+ Checks if a field in the metadata matches the corresponding identifier field.
79
+ """
80
+ # Only compare matching identifier types
81
+ if type(metadata) is not type(identifier):
82
+ return False
83
+
84
+ field_value = getattr(metadata, field.name)
85
+ identifier_value = getattr(identifier, field.name)
86
+
87
+ # allow for partial string matching
88
+ partial_string_match = (
89
+ isinstance(field.type(), str) and identifier_value in field_value
90
+ )
91
+
92
+ # other types should match exactly
93
+ full_match = identifier_value == field_value
94
+
95
+ if any([partial_string_match, full_match]):
96
+ logger.debug("Match: %s - %s", identifier_value, field_value)
97
+ return True
98
+
99
+ return False
monopoly/banks/base.py ADDED
@@ -0,0 +1,54 @@
1
+ import logging
2
+ from abc import ABC, abstractmethod
3
+ from typing import Optional
4
+
5
+ from pydantic import SecretStr
6
+
7
+ from monopoly.config import CreditStatementConfig, DebitStatementConfig, PdfConfig
8
+ from monopoly.constants import EncryptionIdentifier, MetadataIdentifier
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class BankBase(ABC):
14
+ """
15
+ Abstract class to handle initialization of common variables
16
+ that are shared between bank processor classes.
17
+
18
+ Ensures consistency between bank classes.
19
+ """
20
+
21
+ credit_config: Optional[CreditStatementConfig] = None
22
+ debit_config: Optional[DebitStatementConfig] = None
23
+ # pdf_config defaults to an empty object if not overriden
24
+ pdf_config: PdfConfig = PdfConfig()
25
+
26
+ def __init__(self, generic=False):
27
+ self.validate_config(generic)
28
+ self.populate_pdf_config()
29
+
30
+ def validate_config(self, generic: bool):
31
+ # Basic validation to ensure required attributes are set
32
+ if not generic:
33
+ if self.credit_config is None and self.debit_config is None:
34
+ raise NotImplementedError(
35
+ f"{self.__class__.__name__} "
36
+ "must implement either `credit_config` or `debit_config`"
37
+ )
38
+
39
+ def populate_pdf_config(self):
40
+ # Ensure that PDF config always exists
41
+ if not self.pdf_config:
42
+ self.pdf_config = PdfConfig()
43
+ logger.info(f"{self.__class__.__name__}: Using default `pdf_config`")
44
+
45
+ @property
46
+ @abstractmethod
47
+ def identifiers(self) -> list[EncryptionIdentifier | MetadataIdentifier]:
48
+ raise NotImplementedError("Identifiers must be defined")
49
+
50
+ @property
51
+ def passwords(self) -> Optional[list[SecretStr]]:
52
+ if self.passwords:
53
+ return self.passwords
54
+ return None
@@ -0,0 +1,3 @@
1
+ from .citibank import Citibank
2
+
3
+ __all__ = ["Citibank"]
@@ -0,0 +1,36 @@
1
+ import logging
2
+
3
+ from monopoly.config import CreditStatementConfig, PdfConfig, passwords
4
+ from monopoly.constants import (
5
+ BankNames,
6
+ CreditTransactionPatterns,
7
+ MetadataIdentifier,
8
+ StatementBalancePatterns,
9
+ )
10
+
11
+ from ..base import BankBase
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class Citibank(BankBase):
17
+ credit_config = CreditStatementConfig(
18
+ bank_name=BankNames.CITIBANK,
19
+ statement_date_pattern=r"Statement\sDate\s+(.*)",
20
+ prev_balance_pattern=StatementBalancePatterns.CITIBANK,
21
+ transaction_pattern=CreditTransactionPatterns.CITIBANK,
22
+ )
23
+
24
+ pdf_config = PdfConfig(
25
+ page_bbox=(20, 0, 595, 840),
26
+ page_range=(0, -3),
27
+ )
28
+
29
+ identifiers = [
30
+ MetadataIdentifier(
31
+ creator="Ricoh Americas Corporation, AFP2PDF",
32
+ producer="Ricoh Americas Corporation, AFP2PDF",
33
+ )
34
+ ]
35
+
36
+ passwords = passwords.citibank_pdf_passwords
@@ -0,0 +1,3 @@
1
+ from .dbs import Dbs
2
+
3
+ __all__ = ["Dbs"]
@@ -0,0 +1,40 @@
1
+ import logging
2
+
3
+ from monopoly.config import CreditStatementConfig, DebitStatementConfig
4
+ from monopoly.constants import (
5
+ BankNames,
6
+ CreditTransactionPatterns,
7
+ DebitTransactionPatterns,
8
+ EncryptionIdentifier,
9
+ MetadataIdentifier,
10
+ StatementBalancePatterns,
11
+ )
12
+
13
+ from ..base import BankBase
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class Dbs(BankBase):
19
+ credit_config = CreditStatementConfig(
20
+ bank_name=BankNames.DBS,
21
+ statement_date_pattern=r"(\d{2}\s[A-Za-z]{3}\s\d{4})",
22
+ multiline_transactions=False,
23
+ transaction_pattern=CreditTransactionPatterns.DBS,
24
+ prev_balance_pattern=StatementBalancePatterns.DBS,
25
+ )
26
+
27
+ debit_config = DebitStatementConfig(
28
+ bank_name=BankNames.DBS,
29
+ statement_date_pattern=r"(\d{2}\s[A-Za-z]{3}\s\d{4})",
30
+ multiline_transactions=True,
31
+ debit_statement_identifier=r"(WITHDRAWAL.*DEPOSIT.*BALANCE)",
32
+ transaction_pattern=DebitTransactionPatterns.DBS,
33
+ )
34
+
35
+ identifiers = [
36
+ EncryptionIdentifier(
37
+ pdf_version=1.4, algorithm=2, revision=3, length=128, permissions=-1852
38
+ ),
39
+ MetadataIdentifier(creator="Quadient CXM AG"),
40
+ ]
@@ -0,0 +1,3 @@
1
+ from .hsbc import Hsbc
2
+
3
+ __all__ = ["Hsbc"]
@@ -0,0 +1,40 @@
1
+ import logging
2
+
3
+ from monopoly.config import CreditStatementConfig, PdfConfig, passwords
4
+ from monopoly.constants import (
5
+ BankNames,
6
+ CreditTransactionPatterns,
7
+ EncryptionIdentifier,
8
+ MetadataIdentifier,
9
+ StatementBalancePatterns,
10
+ )
11
+
12
+ from ..base import BankBase
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class Hsbc(BankBase):
18
+ credit_config = CreditStatementConfig(
19
+ bank_name=BankNames.HSBC,
20
+ statement_date_pattern=r"Statement From .* to (\d{2}\s[A-Z]{3}\s\d{4})",
21
+ prev_balance_pattern=StatementBalancePatterns.HSBC,
22
+ transaction_pattern=CreditTransactionPatterns.HSBC,
23
+ multiline_transactions=True,
24
+ )
25
+
26
+ pdf_config = PdfConfig(
27
+ page_bbox=(0, 0, 379, 842),
28
+ )
29
+
30
+ identifiers = [
31
+ EncryptionIdentifier(
32
+ pdf_version=1.6, algorithm=4, revision=4, length=128, permissions=-1804
33
+ ),
34
+ MetadataIdentifier(
35
+ title="PRJ_BEAGLE_ST_CNS_SGH_APP_Orchid",
36
+ creator="OpenText Exstream",
37
+ ),
38
+ ]
39
+
40
+ passwords = passwords.hsbc_pdf_passwords
@@ -0,0 +1,3 @@
1
+ from .ocbc import Ocbc
2
+
3
+ __all__ = ["Ocbc"]
@@ -0,0 +1,43 @@
1
+ import logging
2
+
3
+ from monopoly.config import CreditStatementConfig, DebitStatementConfig, passwords
4
+ from monopoly.constants import (
5
+ BankNames,
6
+ CreditTransactionPatterns,
7
+ DebitTransactionPatterns,
8
+ EncryptionIdentifier,
9
+ MetadataIdentifier,
10
+ StatementBalancePatterns,
11
+ )
12
+
13
+ from ..base import BankBase
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class Ocbc(BankBase):
19
+ credit_config = CreditStatementConfig(
20
+ bank_name=BankNames.OCBC,
21
+ statement_date_pattern=r"(\d{2}\-\d{2}\-\d{4})",
22
+ prev_balance_pattern=StatementBalancePatterns.OCBC,
23
+ transaction_pattern=CreditTransactionPatterns.OCBC,
24
+ )
25
+
26
+ debit_config = DebitStatementConfig(
27
+ bank_name=BankNames.OCBC,
28
+ statement_date_pattern=r"TO\s(\d+\s[A-Za-z]{3}\s\d{4})",
29
+ debit_statement_identifier=r"(Withdrawal.*Deposit.*Balance)",
30
+ transaction_pattern=DebitTransactionPatterns.OCBC,
31
+ multiline_transactions=True,
32
+ )
33
+
34
+ identifiers = [
35
+ EncryptionIdentifier(
36
+ pdf_version=1.4, algorithm=4, revision=4, length=128, permissions=-1036
37
+ ),
38
+ MetadataIdentifier(
39
+ creator="pdfgen", producer="Streamline PDFGen for OCBC Group"
40
+ ),
41
+ ]
42
+
43
+ passwords = passwords.ocbc_pdf_passwords
@@ -0,0 +1,3 @@
1
+ from .standard_chartered import StandardChartered
2
+
3
+ __all__ = ["StandardChartered"]
@@ -0,0 +1,36 @@
1
+ import logging
2
+
3
+ from monopoly.config import CreditStatementConfig, PdfConfig, passwords
4
+ from monopoly.constants import (
5
+ BankNames,
6
+ CreditTransactionPatterns,
7
+ MetadataIdentifier,
8
+ StatementBalancePatterns,
9
+ )
10
+
11
+ from ..base import BankBase
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class StandardChartered(BankBase):
17
+ credit_config = CreditStatementConfig(
18
+ bank_name=BankNames.STANDARD_CHARTERED,
19
+ statement_date_pattern=r"(\d{2}\s\w+\s\d{4})",
20
+ prev_balance_pattern=StatementBalancePatterns.STANDARD_CHARTERED,
21
+ transaction_pattern=CreditTransactionPatterns.STANDARD_CHARTERED,
22
+ )
23
+
24
+ pdf_config = PdfConfig(
25
+ page_range=(0, -1),
26
+ page_bbox=(0, 0, 580, 820),
27
+ )
28
+
29
+ identifiers = [
30
+ MetadataIdentifier(
31
+ title="eStatement",
32
+ producer="iText",
33
+ )
34
+ ]
35
+
36
+ passwords = passwords.standard_chartered_pdf_passwords