counterparty 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- counterparty/__init__.py +29 -0
- counterparty/extraction/__init__.py +0 -0
- counterparty/extraction/clean.py +124 -0
- counterparty/extraction/extract_payer_payee.py +160 -0
- counterparty/extraction/infer_counterparty.py +17 -0
- counterparty/key_engine/__init__.py +0 -0
- counterparty/key_engine/canonical_keys.json +326 -0
- counterparty/key_engine/key_detector.py +335 -0
- counterparty/key_engine/keys.py +332 -0
- counterparty/parsers/LAT_AM/LAT_AM_Entry.py +91 -0
- counterparty/parsers/LAT_AM/__init__.py +0 -0
- counterparty/parsers/LAT_AM/pattern1.py +169 -0
- counterparty/parsers/LAT_AM/pattern10.py +76 -0
- counterparty/parsers/LAT_AM/pattern11.py +76 -0
- counterparty/parsers/LAT_AM/pattern12.py +99 -0
- counterparty/parsers/LAT_AM/pattern2.py +102 -0
- counterparty/parsers/LAT_AM/pattern3.py +75 -0
- counterparty/parsers/LAT_AM/pattern4.py +128 -0
- counterparty/parsers/LAT_AM/pattern5.py +54 -0
- counterparty/parsers/LAT_AM/pattern6.py +141 -0
- counterparty/parsers/LAT_AM/pattern7.py +116 -0
- counterparty/parsers/LAT_AM/pattern8.py +134 -0
- counterparty/parsers/LAT_AM/pattern9.py +86 -0
- counterparty/parsers/__init__.py +0 -0
- counterparty/parsers/ach/__init__.py +0 -0
- counterparty/parsers/ach/ach_parser.py +190 -0
- counterparty/parsers/avidpay/__init__.py +0 -0
- counterparty/parsers/avidpay/avidp_check_parser.py +82 -0
- counterparty/parsers/avidpay/avidp_gen_parser.py +59 -0
- counterparty/parsers/directdebit/__init__.py +0 -0
- counterparty/parsers/directdebit/directdeb.py +80 -0
- counterparty/parsers/disbursement/__init__.py +0 -0
- counterparty/parsers/disbursement/disb_parser.py +72 -0
- counterparty/parsers/fundsTransfer/__init__.py +0 -0
- counterparty/parsers/fundsTransfer/fundsTrans_parser.py +80 -0
- counterparty/parsers/generic/__init__.py +0 -0
- counterparty/parsers/generic/all_parser.py +91 -0
- counterparty/parsers/merchref/__init__.py +0 -0
- counterparty/parsers/merchref/merch_ref_parser.py +47 -0
- counterparty/parsers/misc/__init__.py +0 -0
- counterparty/parsers/misc/cardp.py +61 -0
- counterparty/parsers/misc/invo.py +78 -0
- counterparty/parsers/misc/webt.py +55 -0
- counterparty/parsers/paypal/__init__.py +0 -0
- counterparty/parsers/paypal/paypal.py +118 -0
- counterparty/parsers/processor_eft/__init__.py +0 -0
- counterparty/parsers/processor_eft/peft.py +110 -0
- counterparty/parsers/remittance/__init__.py +0 -0
- counterparty/parsers/remittance/remi.py +79 -0
- counterparty/parsers/swift/__init__.py +0 -0
- counterparty/parsers/swift/swift_parser.py +97 -0
- counterparty/parsers/vendorpay/__init__.py +0 -0
- counterparty/parsers/vendorpay/vp_parser.py +54 -0
- counterparty/parsers/vendorpymt/__init__.py +0 -0
- counterparty/parsers/vendorpymt/vpymt_parser.py +132 -0
- counterparty/parsers/wire/__init__.py +0 -0
- counterparty/parsers/wire/wire_parser.py +137 -0
- counterparty/route.py +116 -0
- counterparty/routines.py +72 -0
- counterparty/util.py +40 -0
- counterparty-0.1.6.dist-info/METADATA +9 -0
- counterparty-0.1.6.dist-info/RECORD +64 -0
- counterparty-0.1.6.dist-info/WHEEL +5 -0
- counterparty-0.1.6.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
def normalize_narrative(line: str) -> str:
|
|
4
|
+
if not line:
|
|
5
|
+
return ""
|
|
6
|
+
line = line.lstrip(",")
|
|
7
|
+
line = re.sub(r"[,\s]+$", "", line)
|
|
8
|
+
line = re.sub(r"\s+", " ", line)
|
|
9
|
+
return line.strip().upper()
|
|
10
|
+
|
|
11
|
+
MERCHANT_REF_SIMPLE_RE = re.compile(
|
|
12
|
+
r"^[A-Z0-9 .&'-]+?\s+[A-Z0-9-]{2,}\s+\d{6,}\s+\d{6,}$"
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
MERCHANT_REF_CONTEXT_RE = re.compile(
|
|
16
|
+
r"""
|
|
17
|
+
^
|
|
18
|
+
[A-Z .&'-]+
|
|
19
|
+
(?:\s+[A-Z0-9]{2,})+
|
|
20
|
+
\s+[A-Z .&]{3,}
|
|
21
|
+
\s+[A-Z0-9]{6,}
|
|
22
|
+
$
|
|
23
|
+
""",
|
|
24
|
+
re.VERBOSE
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
def is_merchant_reference(line: str) -> bool:
|
|
28
|
+
norm = normalize_narrative(line)
|
|
29
|
+
return bool(
|
|
30
|
+
MERCHANT_REF_SIMPLE_RE.match(norm)
|
|
31
|
+
or MERCHANT_REF_CONTEXT_RE.match(norm)
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def parse_merchant_reference(line: str) -> dict:
|
|
37
|
+
norm = normalize_narrative(line)
|
|
38
|
+
parts = norm.split()
|
|
39
|
+
|
|
40
|
+
return {
|
|
41
|
+
"RAW": line,
|
|
42
|
+
"FORMAT": "MERCHANT_REFERENCE",
|
|
43
|
+
"ENTITY": " ".join(parts[:-3]),
|
|
44
|
+
"REFERENCE_BLOCK": " ".join(parts[-3:]),
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
|
|
File without changes
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
def normalize_narrative(line: str) -> str:
|
|
4
|
+
if not line:
|
|
5
|
+
return ""
|
|
6
|
+
line = line.lstrip(",")
|
|
7
|
+
line = re.sub(r"[,\s]+$", "", line)
|
|
8
|
+
line = re.sub(r"\s+", " ", line)
|
|
9
|
+
return line.strip().upper()
|
|
10
|
+
|
|
11
|
+
CARD_PAYMENT_RECOGNISE_RE = re.compile(
|
|
12
|
+
r"""
|
|
13
|
+
^[A-Z0-9 .&'-]+
|
|
14
|
+
\s+
|
|
15
|
+
(ONLINE|POS)\s+PMT
|
|
16
|
+
\s+
|
|
17
|
+
\d{8,}$
|
|
18
|
+
""",
|
|
19
|
+
re.VERBOSE
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
def is_card_payment(line: str) -> bool:
|
|
23
|
+
norm = normalize_narrative(line)
|
|
24
|
+
if not norm:
|
|
25
|
+
return False
|
|
26
|
+
|
|
27
|
+
# hard guard: exclude known rails / processors
|
|
28
|
+
if any(k in norm for k in (
|
|
29
|
+
"ACH", "WIRE", "REF ", "VAID", "RMR", "VENDOR", "AVIDPAY", "DISBURSE", "TFR"
|
|
30
|
+
)):
|
|
31
|
+
return False
|
|
32
|
+
|
|
33
|
+
return bool(CARD_PAYMENT_RECOGNISE_RE.search(norm))
|
|
34
|
+
|
|
35
|
+
CARD_PAYMENT_PARSE_RE = re.compile(
|
|
36
|
+
r"""
|
|
37
|
+
^
|
|
38
|
+
(?P<MERCHANT_NAME>.+?)\s+
|
|
39
|
+
(?P<CHANNEL>ONLINE|POS)\s+PMT
|
|
40
|
+
\s+
|
|
41
|
+
(?P<REFERENCE_ID>\d+)
|
|
42
|
+
$
|
|
43
|
+
""",
|
|
44
|
+
re.VERBOSE
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
def parse_card_payment(line: str) -> dict:
|
|
48
|
+
norm = normalize_narrative(line)
|
|
49
|
+
|
|
50
|
+
m = CARD_PAYMENT_PARSE_RE.search(norm)
|
|
51
|
+
if not m:
|
|
52
|
+
return {"META": norm}
|
|
53
|
+
|
|
54
|
+
return {
|
|
55
|
+
"RAW": line,
|
|
56
|
+
"FORMAT": "CARD_PAYMENT",
|
|
57
|
+
"ENTITY": m.group("MERCHANT_NAME"),
|
|
58
|
+
"CHANNEL": m.group("CHANNEL"),
|
|
59
|
+
"REFERENCE_ID": m.group("REFERENCE_ID"),
|
|
60
|
+
"META": norm
|
|
61
|
+
}
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
def normalize_narrative(line: str) -> str:
|
|
4
|
+
if not line:
|
|
5
|
+
return ""
|
|
6
|
+
line = line.lstrip(",")
|
|
7
|
+
line = re.sub(r"[,\s]+$", "", line)
|
|
8
|
+
line = re.sub(r"\s+", " ", line)
|
|
9
|
+
return line.strip().upper()
|
|
10
|
+
|
|
11
|
+
INVOICE_REFERENCE_RECOGNISE_RE = re.compile(
|
|
12
|
+
r".+\/INVOICE\s+\w+\s+.+"
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
INVOICE_ALT_RECOGNISE_RE = re.compile(
|
|
16
|
+
r"^.+?\s+INVOICE\s+\d{6}\s+[A-Z0-9]+(?:[A-Z0-9]+)?$",
|
|
17
|
+
re.IGNORECASE
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
def is_invoice_reference(line: str) -> bool:
|
|
21
|
+
norm = normalize_narrative(line)
|
|
22
|
+
if not norm:
|
|
23
|
+
return False
|
|
24
|
+
return bool(INVOICE_REFERENCE_RECOGNISE_RE.search(norm)) or bool(INVOICE_ALT_RECOGNISE_RE.search(norm))
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
INVOICE_REFERENCE_PARSE_RE = re.compile(
|
|
28
|
+
r"""
|
|
29
|
+
^
|
|
30
|
+
(?P<ENTITY_NAME>.+?)\/
|
|
31
|
+
INVOICE\s+
|
|
32
|
+
(?P<INVOICE_NO>[A-Z0-9]+)
|
|
33
|
+
\s+
|
|
34
|
+
(?P<SERVICE_PROVIDER>.+)
|
|
35
|
+
$
|
|
36
|
+
""",
|
|
37
|
+
re.VERBOSE
|
|
38
|
+
)
|
|
39
|
+
INVOICE_ALT_PARSE_RE = re.compile(
|
|
40
|
+
r"^(?P<ENTITY_NAME>.+?)\s+INVOICE\s+(?P<DATE>\d{6})\s+(?P<INVOICE_NO>[A-Z0-9]+(?:[A-Z0-9]+)?)$",
|
|
41
|
+
re.IGNORECASE
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
def parse_invoice_reference(line: str) -> dict:
|
|
45
|
+
norm = normalize_narrative(line)
|
|
46
|
+
|
|
47
|
+
# first try slash /INVOICE format
|
|
48
|
+
m = INVOICE_REFERENCE_PARSE_RE.search(norm)
|
|
49
|
+
if m:
|
|
50
|
+
return {
|
|
51
|
+
"RAW": line,
|
|
52
|
+
"FORMAT": "INVOICE_REFERENCE",
|
|
53
|
+
"ENTITY": m.group("ENTITY_NAME"),
|
|
54
|
+
"DOCUMENT_TYPE": "INVOICE",
|
|
55
|
+
"INVOICE_NO": m.group("INVOICE_NO"),
|
|
56
|
+
"SERVICE_PROVIDER": m.group("SERVICE_PROVIDER"),
|
|
57
|
+
"META": norm
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
# try alternate format
|
|
61
|
+
m2 = INVOICE_ALT_PARSE_RE.search(norm)
|
|
62
|
+
if m2:
|
|
63
|
+
return {
|
|
64
|
+
"RAW": line,
|
|
65
|
+
"FORMAT": "INVOICE_REFERENCE",
|
|
66
|
+
"ENTITY": m2.group("ENTITY_NAME"),
|
|
67
|
+
"DOCUMENT_TYPE": "INVOICE",
|
|
68
|
+
"INVOICE_NO": m2.group("INVOICE_NO"),
|
|
69
|
+
"DATE": m2.group("DATE"),
|
|
70
|
+
"META": norm
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
return {
|
|
74
|
+
"RAW": line,
|
|
75
|
+
"META": norm,
|
|
76
|
+
"ERROR": "INVOICE_PARSE_FAILED"
|
|
77
|
+
}
|
|
78
|
+
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
def normalize_narrative(line: str) -> str:
|
|
4
|
+
if not line:
|
|
5
|
+
return ""
|
|
6
|
+
line = line.lstrip(",")
|
|
7
|
+
line = re.sub(r"[,\s]+$", "", line)
|
|
8
|
+
line = re.sub(r"\s+", " ", line)
|
|
9
|
+
return line.strip().upper()
|
|
10
|
+
|
|
11
|
+
WEB_TRANSFER_RECOGNISE_RE = re.compile(
|
|
12
|
+
r"\bWEB\s+TFR\s+FR\s+\d+"
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
def is_web_transfer(line: str) -> bool:
|
|
16
|
+
norm = normalize_narrative(line)
|
|
17
|
+
if not norm:
|
|
18
|
+
return False
|
|
19
|
+
return bool(WEB_TRANSFER_RECOGNISE_RE.search(norm))
|
|
20
|
+
|
|
21
|
+
WEB_TRANSFER_PARSE_RE = re.compile(
|
|
22
|
+
r"""
|
|
23
|
+
^
|
|
24
|
+
WEB\s+TFR\s+FR\s+
|
|
25
|
+
(?P<SOURCE_REF>\d+)
|
|
26
|
+
\s+
|
|
27
|
+
(?P<DESCRIPTION>.+?)
|
|
28
|
+
\s+
|
|
29
|
+
(?P<TRAILING_REFS>(\d+[,\s]?)+)
|
|
30
|
+
$
|
|
31
|
+
""",
|
|
32
|
+
re.VERBOSE
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
def parse_web_transfer(line: str) -> dict:
|
|
36
|
+
norm = normalize_narrative(line)
|
|
37
|
+
|
|
38
|
+
m = WEB_TRANSFER_PARSE_RE.search(norm)
|
|
39
|
+
if not m:
|
|
40
|
+
return {
|
|
41
|
+
"RAW": line,
|
|
42
|
+
"META": norm
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
refs = re.findall(r"\d+", m.group("TRAILING_REFS"))
|
|
46
|
+
|
|
47
|
+
return {
|
|
48
|
+
"RAW": line,
|
|
49
|
+
"FORMAT": "WEB_TRANSFER",
|
|
50
|
+
"CHANNEL": "WEB",
|
|
51
|
+
"SOURCE_REF": m.group("SOURCE_REF"),
|
|
52
|
+
"DESCRIPTION": m.group("DESCRIPTION").strip(),
|
|
53
|
+
"REFERENCE_IDS": refs,
|
|
54
|
+
"META": norm
|
|
55
|
+
}
|
|
File without changes
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import json
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
# NORMALIZATION
|
|
6
|
+
|
|
7
|
+
def normalize_narrative(line: str) -> str:
|
|
8
|
+
if not line:
|
|
9
|
+
return ""
|
|
10
|
+
line = re.sub(r"^[,|]+", "", line)
|
|
11
|
+
line = re.sub(r"[\\|,]+$", "", line)
|
|
12
|
+
line = re.sub(r"\s+", " ", line)
|
|
13
|
+
return line.strip().upper()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# PAYPAL RECOGNIZERS
|
|
18
|
+
|
|
19
|
+
PAYPAL_RDC_RE = re.compile(
|
|
20
|
+
r"\bPAYPAL\b.*\bRDC\b.*\bDEP\s+CR\b",
|
|
21
|
+
re.IGNORECASE
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
PAYPAL_ACH_RETURN_PARSE_RE = re.compile(
|
|
25
|
+
r"""
|
|
26
|
+
PAYPAL\s+
|
|
27
|
+
BANKBK\s+
|
|
28
|
+
IBTRANSFER\s+
|
|
29
|
+
ACH\s+RTN
|
|
30
|
+
\s*-\s*
|
|
31
|
+
(?P<RETURN_DATE>\d{1,2}/\d{1,2}/\d{4})
|
|
32
|
+
\s*-\s*
|
|
33
|
+
(?P<TRACE_BLOCK>[\d\s\-]+)
|
|
34
|
+
""",
|
|
35
|
+
re.VERBOSE
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# CLASSIFIER
|
|
41
|
+
|
|
42
|
+
def classify_paypal(line: str) -> str | None:
|
|
43
|
+
norm = normalize_narrative(line)
|
|
44
|
+
|
|
45
|
+
if "PAYPAL" not in norm:
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
if PAYPAL_ACH_RETURN_PARSE_RE.search(norm):
|
|
49
|
+
return "PAYPAL_ACH_RETURN"
|
|
50
|
+
|
|
51
|
+
if PAYPAL_RDC_RE.search(norm):
|
|
52
|
+
return "PAYPAL_RDC_DEPOSIT"
|
|
53
|
+
|
|
54
|
+
return "PAYPAL_OTHER"
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# PARSERS
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def parse_paypal_rdc(line: str) -> dict:
|
|
62
|
+
norm = normalize_narrative(line)
|
|
63
|
+
|
|
64
|
+
# find 6-digit token for deposit code
|
|
65
|
+
deposit_code = None
|
|
66
|
+
m = re.search(r"\b(\d{6})\b", norm)
|
|
67
|
+
if m:
|
|
68
|
+
deposit_code = m.group(1)
|
|
69
|
+
|
|
70
|
+
return {
|
|
71
|
+
"RAW": line,
|
|
72
|
+
"FORMAT": "PAYPAL_RDC_DEPOSIT",
|
|
73
|
+
"PROCESSOR": "PAYPAL",
|
|
74
|
+
"RAIL": "RDC",
|
|
75
|
+
"DIRECTION": "CREDIT",
|
|
76
|
+
"deposit_code": deposit_code,
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def parse_paypal_ach_return(line: str) -> dict:
|
|
81
|
+
norm = normalize_narrative(line)
|
|
82
|
+
|
|
83
|
+
m = PAYPAL_ACH_RETURN_PARSE_RE.search(norm)
|
|
84
|
+
if not m:
|
|
85
|
+
return {
|
|
86
|
+
"RAW": line,
|
|
87
|
+
"META": norm,
|
|
88
|
+
"ERROR": "PAYPAL_ACH_RETURN_PARSE_FAILED"
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
trace_parts = re.findall(r"\d+", m.group("TRACE_BLOCK"))
|
|
92
|
+
|
|
93
|
+
return {
|
|
94
|
+
"RAW": line,
|
|
95
|
+
"TRANS_TYPE": "PAYPAL_ACH_RETURN",
|
|
96
|
+
"RETURN_DATE": m.group("RETURN_DATE"),
|
|
97
|
+
"TRACE_NUMBERS": trace_parts,
|
|
98
|
+
"RAW_TRACE_BLOCK": m.group("TRACE_BLOCK"),
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def parse_paypal(line: str) -> dict | None:
|
|
103
|
+
cls = classify_paypal(line)
|
|
104
|
+
|
|
105
|
+
if cls == "PAYPAL_RDC_DEPOSIT":
|
|
106
|
+
return parse_paypal_rdc(line)
|
|
107
|
+
|
|
108
|
+
if cls == "PAYPAL_ACH_RETURN":
|
|
109
|
+
return parse_paypal_ach_return(line)
|
|
110
|
+
|
|
111
|
+
if cls == "PAYPAL_OTHER":
|
|
112
|
+
return {
|
|
113
|
+
"RAW": line,
|
|
114
|
+
"FORMAT": "PAYPAL_OTHER",
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
return None
|
|
118
|
+
|
|
File without changes
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
# NORMALIZATION
|
|
4
|
+
def normalize_narrative(line: str) -> str:
|
|
5
|
+
if not line:
|
|
6
|
+
return ""
|
|
7
|
+
line = line.lstrip(",")
|
|
8
|
+
line = re.sub(r"\s+", " ", line)
|
|
9
|
+
return line.strip().upper()
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# PROCESSOR EFT REGEX (TIGHTENED)
|
|
14
|
+
PROCESSOR_EFT_RECOGNISE_RE = re.compile(
|
|
15
|
+
r"""
|
|
16
|
+
^
|
|
17
|
+
(?P<proc>[A-Z][A-Z0-9 .,&'-]{0,60}) # processor name
|
|
18
|
+
\s+
|
|
19
|
+
(?P<code>[A-Z]{2,6}) # processor code (min 2 chars)
|
|
20
|
+
(?P<batch>\d{3,9})? # optional batch / id
|
|
21
|
+
(?:
|
|
22
|
+
\s+(?P<date>\d{6}) | # YYMMDD
|
|
23
|
+
\s+(?P<date_dash>\d{2}-\d{2}-\d{2})
|
|
24
|
+
)?
|
|
25
|
+
(?:\s+(?P<refs>[^:]+))? # refs, no colon allowed
|
|
26
|
+
$
|
|
27
|
+
""",
|
|
28
|
+
re.VERBOSE
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
_BANK_NAME_PATTERNS = [
|
|
33
|
+
r"PNC",
|
|
34
|
+
r"CHASE",
|
|
35
|
+
r"CITI",
|
|
36
|
+
r"BANK\s+OF\s+AMERICA",
|
|
37
|
+
r"BOFA",
|
|
38
|
+
r"WELLS\s+FARGO",
|
|
39
|
+
r"CAPITAL\s+ONE",
|
|
40
|
+
r"U\.S\.?BANK",
|
|
41
|
+
r"US\s+BANK",
|
|
42
|
+
r"TDBANK",
|
|
43
|
+
r"TD\s+BANK"
|
|
44
|
+
]
|
|
45
|
+
_BANK_NAME_RE = re.compile(r"^(?:" + r"|".join(_BANK_NAME_PATTERNS) + r")\b")
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
# PEFT DETECTION (HARDENED)
|
|
50
|
+
def is_processor_eft(line: str) -> bool:
|
|
51
|
+
norm = normalize_narrative(line)
|
|
52
|
+
if not norm:
|
|
53
|
+
return False
|
|
54
|
+
|
|
55
|
+
if ":" in norm:
|
|
56
|
+
return False
|
|
57
|
+
|
|
58
|
+
if re.search(r"\b(NAME|RECEIVER|SENDER)\b", norm):
|
|
59
|
+
return False
|
|
60
|
+
|
|
61
|
+
if re.search(r"\b(COMP\s+ID|COMPANY\s+ID|MERCHANT)\b", norm):
|
|
62
|
+
return False
|
|
63
|
+
|
|
64
|
+
if re.search(r"\bREMOTE\s+DEPOSIT\b", norm):
|
|
65
|
+
return False
|
|
66
|
+
|
|
67
|
+
if re.search(r"\bMOBILE\s+DEPOSIT\b", norm):
|
|
68
|
+
return False
|
|
69
|
+
|
|
70
|
+
if re.search(r"\bFUNDS\s+TRANSFER\b", norm):
|
|
71
|
+
return False
|
|
72
|
+
|
|
73
|
+
if re.search(r"\bFRMDEP\b", norm):
|
|
74
|
+
return False
|
|
75
|
+
|
|
76
|
+
if re.search(r"\b(ACH|WIRE|FED|RDC|CARD)\b", norm):
|
|
77
|
+
return False
|
|
78
|
+
|
|
79
|
+
if "INTEREST" in norm and _BANK_NAME_RE.match(norm):
|
|
80
|
+
return False
|
|
81
|
+
|
|
82
|
+
# ---- FINAL STRUCTURAL CHECK ----
|
|
83
|
+
return bool(PROCESSOR_EFT_RECOGNISE_RE.fullmatch(norm))
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
# PEFT PARSER
|
|
88
|
+
def parse_processor_eft(line: str) -> dict:
|
|
89
|
+
norm = normalize_narrative(line)
|
|
90
|
+
m = PROCESSOR_EFT_RECOGNISE_RE.fullmatch(norm)
|
|
91
|
+
|
|
92
|
+
if not m:
|
|
93
|
+
return {
|
|
94
|
+
"RAW": line,
|
|
95
|
+
"META": norm,
|
|
96
|
+
"ERROR": "PROCESSOR_EFT_PARSE_FAILED"
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
ref_block = m.group("refs") or ""
|
|
100
|
+
refs = [r for r in re.split(r"[\s,]+", ref_block) if r]
|
|
101
|
+
|
|
102
|
+
return {
|
|
103
|
+
"RAW": line,
|
|
104
|
+
"TRANS_TYPE": "PROCESSOR_EFT",
|
|
105
|
+
"ENTITY": m.group("proc").strip(),
|
|
106
|
+
"PROCESSOR_CODE": m.group("code"),
|
|
107
|
+
"BATCH_ID": m.group("batch"),
|
|
108
|
+
"DATE": m.group("date") or m.group("date_dash"),
|
|
109
|
+
"REFERENCE_IDS": refs,
|
|
110
|
+
}
|
|
File without changes
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def normalize_narrative(line: str) -> str:
|
|
5
|
+
if not line:
|
|
6
|
+
return ""
|
|
7
|
+
|
|
8
|
+
line = line.lstrip(",")
|
|
9
|
+
line = re.sub(r"[,\s]+$", "", line)
|
|
10
|
+
line = re.sub(r"\s+", " ", line)
|
|
11
|
+
|
|
12
|
+
return line.strip().upper()
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# -------------------------------
|
|
16
|
+
# Recognition
|
|
17
|
+
# -------------------------------
|
|
18
|
+
def is_remittance_advice(line: str) -> bool:
|
|
19
|
+
norm = normalize_narrative(line)
|
|
20
|
+
if not norm:
|
|
21
|
+
return False
|
|
22
|
+
|
|
23
|
+
return "TRN*" in norm and "RMR*" in norm
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# -------------------------------
|
|
27
|
+
# Regexes
|
|
28
|
+
# -------------------------------
|
|
29
|
+
REMITTANCE_PARSE_RE = re.compile(
|
|
30
|
+
r"""
|
|
31
|
+
^
|
|
32
|
+
(?P<PREFIX>.*?)
|
|
33
|
+
\s*
|
|
34
|
+
TRN\*(?P<TRN_SEQ>\d+)\*(?P<TRN_REF>[^\\]+)
|
|
35
|
+
\\
|
|
36
|
+
(?P<RMR_SEGMENT>RMR\*.+)
|
|
37
|
+
$
|
|
38
|
+
""",
|
|
39
|
+
re.VERBOSE,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
RMR_REF_RE = re.compile(r"RMR\*[^*]*\*([^\\]+)")
|
|
43
|
+
ID_TOKEN_RE = re.compile(r"\b[A-Z0-9]{8,}\b")
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# -------------------------------
|
|
47
|
+
# Parser
|
|
48
|
+
# -------------------------------
|
|
49
|
+
def parse_remittance_advice(line: str) -> dict:
|
|
50
|
+
norm = normalize_narrative(line)
|
|
51
|
+
|
|
52
|
+
m = REMITTANCE_PARSE_RE.search(norm)
|
|
53
|
+
if not m:
|
|
54
|
+
return {
|
|
55
|
+
"RAW": line,
|
|
56
|
+
"FORMAT": "REMITTANCE_ADVICE",
|
|
57
|
+
"META": norm,
|
|
58
|
+
"ERROR": "REMITTANCE_PARSE_FAILED",
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
rmr_raw = m.group("RMR_SEGMENT")
|
|
62
|
+
|
|
63
|
+
# operative entity comes from RMR*
|
|
64
|
+
cp_match = RMR_REF_RE.search(rmr_raw)
|
|
65
|
+
entity = cp_match.group(1).strip() if cp_match else None
|
|
66
|
+
|
|
67
|
+
# opaque references (no semantic guessing)
|
|
68
|
+
refs = list(set(ID_TOKEN_RE.findall(norm)))
|
|
69
|
+
|
|
70
|
+
return {
|
|
71
|
+
"RAW": line,
|
|
72
|
+
"FORMAT": "REMITTANCE_ADVICE",
|
|
73
|
+
"TRANS_TYPE": "EDI_REMITTANCE_FRAGMENT",
|
|
74
|
+
"ENTITY": entity,
|
|
75
|
+
"TRN_SEQUENCE": m.group("TRN_SEQ"),
|
|
76
|
+
"TRN_REFERENCE": m.group("TRN_REF"),
|
|
77
|
+
"RMR_RAW": rmr_raw,
|
|
78
|
+
"REMITTANCE_REFS": refs,
|
|
79
|
+
}
|
|
File without changes
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from counterparty.key_engine.keys import KEYS, INLINE_KEYS
|
|
3
|
+
from counterparty.util import norm
|
|
4
|
+
|
|
5
|
+
KEYS = sorted(set(KEYS), key=len, reverse=True)
|
|
6
|
+
INLINE_KEYS = sorted(INLINE_KEYS, key=len, reverse=True)
|
|
7
|
+
|
|
8
|
+
ALLOWED = {' ', ':', '=',',', '/', '\\', '_', '#','-'}
|
|
9
|
+
|
|
10
|
+
SWIFT_RE = re.compile(r"\b(UETR|SWIFT|S\.W\.I\.F\.T|MT(1\d{2}|2\d{2}|9\d{2}))\b|:\d{2}[A-Z]?:", re.I)
|
|
11
|
+
|
|
12
|
+
def is_swift(text: str) -> bool:
|
|
13
|
+
t = norm(text)
|
|
14
|
+
return bool(t and SWIFT_RE.search(t))
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def is_standalone(text, i, k_len):
|
|
18
|
+
before = text[i - 1] if i > 0 else ' '
|
|
19
|
+
after = text[i + k_len] if i + k_len < len(text) else ' '
|
|
20
|
+
return before in ALLOWED and after in ALLOWED
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def find_keys(text, key_list):
|
|
24
|
+
found = {}
|
|
25
|
+
reserved = []
|
|
26
|
+
|
|
27
|
+
for k in key_list:
|
|
28
|
+
k_len = len(k)
|
|
29
|
+
for i in range(len(text) - k_len + 1):
|
|
30
|
+
if text[i:i + k_len] != k:
|
|
31
|
+
continue
|
|
32
|
+
if not is_standalone(text, i, k_len):
|
|
33
|
+
continue
|
|
34
|
+
if any(s <= i < e for s, e in reserved):
|
|
35
|
+
continue
|
|
36
|
+
|
|
37
|
+
found[i] = k
|
|
38
|
+
reserved.append((i, i + k_len))
|
|
39
|
+
|
|
40
|
+
return dict(sorted(found.items()))
|
|
41
|
+
|
|
42
|
+
# V2 — INLINE KEY SPLITTER
|
|
43
|
+
def split_inline_keys(text: str):
|
|
44
|
+
marks = find_keys(text, INLINE_KEYS)
|
|
45
|
+
if not marks: return text.strip()
|
|
46
|
+
|
|
47
|
+
idx = [0] + list(marks.keys()) + [len(text)]
|
|
48
|
+
out = {}
|
|
49
|
+
out["value"] = text[:idx[1]].strip()
|
|
50
|
+
|
|
51
|
+
for i in range(1, len(idx) - 1):
|
|
52
|
+
start, end = idx[i], idx[i + 1]
|
|
53
|
+
k = marks[start]
|
|
54
|
+
z = start + len(k)
|
|
55
|
+
while z < len(text) and not text[z].isalnum():
|
|
56
|
+
z += 1
|
|
57
|
+
|
|
58
|
+
out[k] = text[z:end].strip()
|
|
59
|
+
return out
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def swift_parser_v2(v1_output: dict):
|
|
63
|
+
out = {}
|
|
64
|
+
for k, v in v1_output.items():
|
|
65
|
+
if isinstance(v, str):
|
|
66
|
+
out[k] = split_inline_keys(v)
|
|
67
|
+
else:
|
|
68
|
+
out[k] = v
|
|
69
|
+
return out
|
|
70
|
+
|
|
71
|
+
# V1 — swift PARSER
|
|
72
|
+
def swift_parser_v1(narr: str):
|
|
73
|
+
marks = find_keys(narr, KEYS)
|
|
74
|
+
idx = [0] + list(marks.keys()) + [len(narr)]
|
|
75
|
+
out = {}
|
|
76
|
+
out["META"] = narr[:idx[1]].strip()
|
|
77
|
+
for i in range(1, len(idx) - 1):
|
|
78
|
+
start, end = idx[i], idx[i + 1]
|
|
79
|
+
k = marks[start]
|
|
80
|
+
z = start + len(k)
|
|
81
|
+
while z < len(narr) and not narr[z].isalnum():
|
|
82
|
+
z += 1
|
|
83
|
+
out[k] = narr[z:end].strip()
|
|
84
|
+
|
|
85
|
+
return out
|
|
86
|
+
|
|
87
|
+
# PIPELINE
|
|
88
|
+
def swift_parser(narr: str):
|
|
89
|
+
v1 = swift_parser_v1(narr)
|
|
90
|
+
v2 = swift_parser_v2(v1)
|
|
91
|
+
return {
|
|
92
|
+
"RAW": narr,
|
|
93
|
+
**v2
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
if __name__ == "__main__":
|
|
97
|
+
pass
|
|
File without changes
|