counterparty 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- counterparty/__init__.py +29 -0
- counterparty/extraction/__init__.py +0 -0
- counterparty/extraction/clean.py +124 -0
- counterparty/extraction/extract_payer_payee.py +160 -0
- counterparty/extraction/infer_counterparty.py +17 -0
- counterparty/key_engine/__init__.py +0 -0
- counterparty/key_engine/canonical_keys.json +326 -0
- counterparty/key_engine/key_detector.py +335 -0
- counterparty/key_engine/keys.py +332 -0
- counterparty/parsers/LAT_AM/LAT_AM_Entry.py +91 -0
- counterparty/parsers/LAT_AM/__init__.py +0 -0
- counterparty/parsers/LAT_AM/pattern1.py +169 -0
- counterparty/parsers/LAT_AM/pattern10.py +76 -0
- counterparty/parsers/LAT_AM/pattern11.py +76 -0
- counterparty/parsers/LAT_AM/pattern12.py +99 -0
- counterparty/parsers/LAT_AM/pattern2.py +102 -0
- counterparty/parsers/LAT_AM/pattern3.py +75 -0
- counterparty/parsers/LAT_AM/pattern4.py +128 -0
- counterparty/parsers/LAT_AM/pattern5.py +54 -0
- counterparty/parsers/LAT_AM/pattern6.py +141 -0
- counterparty/parsers/LAT_AM/pattern7.py +116 -0
- counterparty/parsers/LAT_AM/pattern8.py +134 -0
- counterparty/parsers/LAT_AM/pattern9.py +86 -0
- counterparty/parsers/__init__.py +0 -0
- counterparty/parsers/ach/__init__.py +0 -0
- counterparty/parsers/ach/ach_parser.py +190 -0
- counterparty/parsers/avidpay/__init__.py +0 -0
- counterparty/parsers/avidpay/avidp_check_parser.py +82 -0
- counterparty/parsers/avidpay/avidp_gen_parser.py +59 -0
- counterparty/parsers/directdebit/__init__.py +0 -0
- counterparty/parsers/directdebit/directdeb.py +80 -0
- counterparty/parsers/disbursement/__init__.py +0 -0
- counterparty/parsers/disbursement/disb_parser.py +72 -0
- counterparty/parsers/fundsTransfer/__init__.py +0 -0
- counterparty/parsers/fundsTransfer/fundsTrans_parser.py +80 -0
- counterparty/parsers/generic/__init__.py +0 -0
- counterparty/parsers/generic/all_parser.py +91 -0
- counterparty/parsers/merchref/__init__.py +0 -0
- counterparty/parsers/merchref/merch_ref_parser.py +47 -0
- counterparty/parsers/misc/__init__.py +0 -0
- counterparty/parsers/misc/cardp.py +61 -0
- counterparty/parsers/misc/invo.py +78 -0
- counterparty/parsers/misc/webt.py +55 -0
- counterparty/parsers/paypal/__init__.py +0 -0
- counterparty/parsers/paypal/paypal.py +118 -0
- counterparty/parsers/processor_eft/__init__.py +0 -0
- counterparty/parsers/processor_eft/peft.py +110 -0
- counterparty/parsers/remittance/__init__.py +0 -0
- counterparty/parsers/remittance/remi.py +79 -0
- counterparty/parsers/swift/__init__.py +0 -0
- counterparty/parsers/swift/swift_parser.py +97 -0
- counterparty/parsers/vendorpay/__init__.py +0 -0
- counterparty/parsers/vendorpay/vp_parser.py +54 -0
- counterparty/parsers/vendorpymt/__init__.py +0 -0
- counterparty/parsers/vendorpymt/vpymt_parser.py +132 -0
- counterparty/parsers/wire/__init__.py +0 -0
- counterparty/parsers/wire/wire_parser.py +137 -0
- counterparty/route.py +116 -0
- counterparty/routines.py +72 -0
- counterparty/util.py +40 -0
- counterparty-0.1.6.dist-info/METADATA +9 -0
- counterparty-0.1.6.dist-info/RECORD +64 -0
- counterparty-0.1.6.dist-info/WHEEL +5 -0
- counterparty-0.1.6.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from counterparty.key_engine.keys import KEYS, INLINE_KEYS
|
|
3
|
+
from counterparty.util import norm
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
KEYS = sorted(set(KEYS), key=len, reverse=True)
|
|
7
|
+
INLINE_KEYS = sorted(INLINE_KEYS, key=len, reverse=True)
|
|
8
|
+
ALLOWED = {' ', ':', '=', ',', '/', '\\', '_', '#', '-'}
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
ACH_RE = re.compile(
|
|
12
|
+
r"\b(ACH|A\.?C\.?H|ODFI|RDFI|TRACE(\s*NO|\s*NUMBER)?|"
|
|
13
|
+
r"COMPANY\s*ID|ENTRY\s*DESC|DISCRETIONARY|"
|
|
14
|
+
r"SEC\s*(CCD|PPD|CTX|WEB|TEL|POP)|CCD|PPD|CTX|WEB)\b",
|
|
15
|
+
re.I
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
ACH_EXCLUDE_RE = re.compile(
|
|
19
|
+
r"\b(WIRE|FEDWIRE|FED\s*REF|IMAD|OMAD|UETR|MT\d{3}|"
|
|
20
|
+
r"SWIFT|IBAN|ORG=|OBK=|IBK=|BBK=|BNF=|CARD|CHECK|RDC)\b",
|
|
21
|
+
re.I
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
ACH_RETURN_RE = re.compile(
|
|
25
|
+
r"\b(ACH\s*RTN|ACH\s*RETURN|RETURNED\s*ACH|ACH\s*REVERSAL)\b",
|
|
26
|
+
re.I
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
ACH_RETURN_REASON_RE = re.compile(
|
|
30
|
+
r"\b(NOT\s*AUTHORIZED|UNAUTHORIZED|CUSTOMER\s*ADVISES|"
|
|
31
|
+
r"DISPUTE|REVERSAL)\b",
|
|
32
|
+
re.I
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
BANK_PREFIX_RE = re.compile(
|
|
36
|
+
r"^[A-Z][A-Z0-9&.\- ]{3,30}BK\b",
|
|
37
|
+
re.I
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
REF_RE = re.compile(
|
|
41
|
+
r"\b([A-Z]{2,6}\d{5,}|ITD\d+|TRACE\d+)\b",
|
|
42
|
+
re.I
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def is_ach(text: str) -> bool:
|
|
47
|
+
t = norm(text)
|
|
48
|
+
return bool(t and not ACH_EXCLUDE_RE.search(t) and ACH_RE.search(t))
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def ach_return_parser(narr: str):
|
|
52
|
+
t = norm(narr)
|
|
53
|
+
if not t:
|
|
54
|
+
return None
|
|
55
|
+
|
|
56
|
+
if not ACH_RETURN_RE.search(t):
|
|
57
|
+
return None
|
|
58
|
+
|
|
59
|
+
out = {
|
|
60
|
+
"RAW": narr,
|
|
61
|
+
"txn_type": "ACH_RETURN",
|
|
62
|
+
"rail": "ACH",
|
|
63
|
+
"category": "BANK_ADJUSTMENT",
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
m = BANK_PREFIX_RE.search(t)
|
|
67
|
+
if m:
|
|
68
|
+
out["bank"] = m.group().strip()
|
|
69
|
+
|
|
70
|
+
m = ACH_RETURN_REASON_RE.search(t)
|
|
71
|
+
if m:
|
|
72
|
+
out["return_reason"] = m.group().upper()
|
|
73
|
+
|
|
74
|
+
m = REF_RE.search(t)
|
|
75
|
+
if m:
|
|
76
|
+
out["reference"] = m.group()
|
|
77
|
+
|
|
78
|
+
try:
|
|
79
|
+
a = ACH_RETURN_RE.search(t).end()
|
|
80
|
+
name = t[a:]
|
|
81
|
+
name = re.sub(r"\b(CUSTOMER\s+ADV\S*|ADV\S*)\b", "", name, flags=re.I)
|
|
82
|
+
name = re.sub(r"\s{2,}", " ", name).strip(" -:,")
|
|
83
|
+
if name:
|
|
84
|
+
out["ENTITY"] = name
|
|
85
|
+
except Exception:
|
|
86
|
+
pass
|
|
87
|
+
|
|
88
|
+
return out
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def is_standalone(text, i, k_len):
|
|
92
|
+
before = text[i - 1] if i > 0 else ' '
|
|
93
|
+
after = text[i + k_len] if i + k_len < len(text) else ' '
|
|
94
|
+
return before in ALLOWED and after in ALLOWED
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def normalize_key(k: str):
|
|
98
|
+
if k in ("REMAR K", "R EMARK", "REMA RK", "REMARK"):
|
|
99
|
+
return "REMARK"
|
|
100
|
+
return k
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def find_keys(text, key_list):
|
|
104
|
+
found = {}
|
|
105
|
+
reserved = []
|
|
106
|
+
|
|
107
|
+
for k in key_list:
|
|
108
|
+
k_len = len(k)
|
|
109
|
+
for i in range(len(text) - k_len + 1):
|
|
110
|
+
if text[i:i + k_len] != k:
|
|
111
|
+
continue
|
|
112
|
+
if not is_standalone(text, i, k_len):
|
|
113
|
+
continue
|
|
114
|
+
if any(s <= i < e for s, e in reserved):
|
|
115
|
+
continue
|
|
116
|
+
|
|
117
|
+
found[i] = k
|
|
118
|
+
reserved.append((i, i + k_len))
|
|
119
|
+
|
|
120
|
+
return dict(sorted(found.items()))
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def split_inline_keys(text: str):
|
|
124
|
+
marks = find_keys(text, INLINE_KEYS)
|
|
125
|
+
|
|
126
|
+
if not marks:
|
|
127
|
+
return text.strip()
|
|
128
|
+
|
|
129
|
+
idx = [0] + list(marks.keys()) + [len(text)]
|
|
130
|
+
out = {}
|
|
131
|
+
|
|
132
|
+
out["value"] = text[:idx[1]].strip()
|
|
133
|
+
|
|
134
|
+
for i in range(1, len(idx) - 1):
|
|
135
|
+
start, end = idx[i], idx[i + 1]
|
|
136
|
+
raw_k = marks[start]
|
|
137
|
+
k = normalize_key(raw_k)
|
|
138
|
+
|
|
139
|
+
z = start + len(raw_k)
|
|
140
|
+
while z < len(text) and not text[z].isalnum():
|
|
141
|
+
z += 1
|
|
142
|
+
|
|
143
|
+
out[k] = text[z:end].strip()
|
|
144
|
+
|
|
145
|
+
return out
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def ach_parser_v2(v1_output: dict):
|
|
149
|
+
out = {}
|
|
150
|
+
for k, v in v1_output.items():
|
|
151
|
+
if isinstance(v, str):
|
|
152
|
+
out[k] = split_inline_keys(v)
|
|
153
|
+
else:
|
|
154
|
+
out[k] = v
|
|
155
|
+
return out
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def ach_parser_v1(narr: str):
|
|
159
|
+
marks = find_keys(narr, KEYS)
|
|
160
|
+
|
|
161
|
+
idx = [0] + list(marks.keys()) + [len(narr)]
|
|
162
|
+
out = {}
|
|
163
|
+
|
|
164
|
+
out["META"] = narr[:idx[1]].strip()
|
|
165
|
+
|
|
166
|
+
for i in range(1, len(idx) - 1):
|
|
167
|
+
start, end = idx[i], idx[i + 1]
|
|
168
|
+
raw_k = marks[start]
|
|
169
|
+
k = normalize_key(raw_k)
|
|
170
|
+
|
|
171
|
+
z = start + len(raw_k)
|
|
172
|
+
while z < len(narr) and not narr[z].isalnum():
|
|
173
|
+
z += 1
|
|
174
|
+
|
|
175
|
+
out[k] = narr[z:end].strip()
|
|
176
|
+
|
|
177
|
+
return out
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def ach_parser(narr: str):
|
|
181
|
+
rtn = ach_return_parser(narr)
|
|
182
|
+
if rtn:
|
|
183
|
+
return rtn
|
|
184
|
+
|
|
185
|
+
v1 = ach_parser_v1(narr)
|
|
186
|
+
v2 = ach_parser_v2(v1)
|
|
187
|
+
return {
|
|
188
|
+
"RAW": narr,
|
|
189
|
+
**v2
|
|
190
|
+
}
|
|
File without changes
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
def normalize_narrative(line: str) -> str:
|
|
4
|
+
if not line: return ""
|
|
5
|
+
line = re.sub(r"^[,\|]+", "", line)
|
|
6
|
+
line = re.sub(r"[,\|\\]+$", "", line)
|
|
7
|
+
line = re.sub(r"\s+", " ", line)
|
|
8
|
+
return line.strip().upper()
|
|
9
|
+
|
|
10
|
+
AVIDPAY_CHECK_RECOGNISE_RE = re.compile(
|
|
11
|
+
r"(?:^|[\s\-])AVIDPAY(?!.*\bACH\b).*REF\*?CK\*?\d+\*",
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def is_avidpay_check(line: str) -> bool:
|
|
16
|
+
norm = normalize_narrative(line)
|
|
17
|
+
if not norm:
|
|
18
|
+
return False
|
|
19
|
+
return bool(AVIDPAY_CHECK_RECOGNISE_RE.search(norm))
|
|
20
|
+
|
|
21
|
+
AVIDPAY_CHECK_PARSE_RE = re.compile(
|
|
22
|
+
r"""
|
|
23
|
+
^
|
|
24
|
+
(?P<CTPTY_NAME>.+?)
|
|
25
|
+
[\s\-]*
|
|
26
|
+
AVIDPAY
|
|
27
|
+
\s*
|
|
28
|
+
REF\*?CK\*?(?P<CHECK_NO>\d+)\*
|
|
29
|
+
(?P<PAYEE>.+?)
|
|
30
|
+
$
|
|
31
|
+
""",
|
|
32
|
+
re.VERBOSE
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
AVIDPAY_CHECK_CCD_PARSE_RE = re.compile(
|
|
36
|
+
r"""
|
|
37
|
+
AVIDPAY
|
|
38
|
+
.*?
|
|
39
|
+
REF\*CK\*
|
|
40
|
+
(?P<CHECK_NO>\d+)
|
|
41
|
+
\*
|
|
42
|
+
(?P<PAYEE>.+)
|
|
43
|
+
$
|
|
44
|
+
""",
|
|
45
|
+
re.VERBOSE
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def parse_avidpay_check(line: str) -> dict:
|
|
50
|
+
norm = normalize_narrative(line)
|
|
51
|
+
|
|
52
|
+
# Variant 1: backslash-delimited (existing one)
|
|
53
|
+
m = AVIDPAY_CHECK_PARSE_RE.search(norm)
|
|
54
|
+
if m:
|
|
55
|
+
return {
|
|
56
|
+
"RAW": line,
|
|
57
|
+
"TRANS_TYPE": "AVIDPAY_CHECK",
|
|
58
|
+
"CHECK_NO": m.group("CHECK_NO"),
|
|
59
|
+
"ENTITY": m.group("PAYEE").strip(),
|
|
60
|
+
"VARIANT": "LOCKBOX"
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
# Variant 2: CCD / ACH embedded AVIDPAY
|
|
64
|
+
m = AVIDPAY_CHECK_CCD_PARSE_RE.search(norm)
|
|
65
|
+
if m:
|
|
66
|
+
return {
|
|
67
|
+
"RAW": line,
|
|
68
|
+
"TRANS_TYPE": "AVIDPAY_CHECK",
|
|
69
|
+
"CHECK_NO": m.group("CHECK_NO"),
|
|
70
|
+
"ENTITY": m.group("PAYEE").strip(),
|
|
71
|
+
"VARIANT": "CCD"
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
return {
|
|
75
|
+
"RAW": line,
|
|
76
|
+
"META": norm,
|
|
77
|
+
"ERROR": "AVIDPAY_CHECK_PARSE_FAILED"
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
if __name__=='__main__':
|
|
82
|
+
pass
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from .avidp_check_parser import is_avidpay_check
|
|
3
|
+
|
|
4
|
+
def normalize_narrative(line: str) -> str:
|
|
5
|
+
if not line:
|
|
6
|
+
return ""
|
|
7
|
+
line = line.lstrip(",")
|
|
8
|
+
line = re.sub(r"[,\s]+$", "", line)
|
|
9
|
+
line = re.sub(r"\s+", " ", line)
|
|
10
|
+
return line.strip().upper()
|
|
11
|
+
|
|
12
|
+
AVIDPAY_GENERIC_RECOGNISE_RE = re.compile(
|
|
13
|
+
r"\bAVIDPAY\b\s+REFCK\d+\*.+$"
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
def is_avidpay_generic(line: str) -> bool:
|
|
17
|
+
norm = normalize_narrative(line)
|
|
18
|
+
if not norm:
|
|
19
|
+
return False
|
|
20
|
+
|
|
21
|
+
# exclude full check grammar (those are handled earlier)
|
|
22
|
+
if is_avidpay_check(norm):
|
|
23
|
+
return False
|
|
24
|
+
|
|
25
|
+
return bool(AVIDPAY_GENERIC_RECOGNISE_RE.search(norm))
|
|
26
|
+
|
|
27
|
+
AVIDPAY_GENERIC_PARSE_RE = re.compile(
|
|
28
|
+
r"""
|
|
29
|
+
^
|
|
30
|
+
(?P<COUNTERPARTY_NAME>.+?)\-?
|
|
31
|
+
AVIDPAY\s+
|
|
32
|
+
REFCK(?P<CHECK_NO>\d+)
|
|
33
|
+
\*
|
|
34
|
+
(?P<PAYEE_NAME>.+)
|
|
35
|
+
$
|
|
36
|
+
""",
|
|
37
|
+
re.VERBOSE
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
def parse_avidpay_generic(line: str) -> dict:
|
|
41
|
+
norm = normalize_narrative(line)
|
|
42
|
+
|
|
43
|
+
m = AVIDPAY_GENERIC_PARSE_RE.search(norm)
|
|
44
|
+
if not m:
|
|
45
|
+
return {
|
|
46
|
+
"RAW": line,
|
|
47
|
+
"META": norm
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
return {
|
|
51
|
+
"RAW": line,
|
|
52
|
+
"FORMAT": "AVIDPAY_GENERIC",
|
|
53
|
+
"PROCESSOR": "AVIDPAY",
|
|
54
|
+
"PAYMENT_METHOD": "CHECK",
|
|
55
|
+
"ENTITY": m.group("COUNTERPARTY_NAME").rstrip("-"),
|
|
56
|
+
"CHECK_NO": m.group("CHECK_NO"),
|
|
57
|
+
"PAYEE_NAME": m.group("PAYEE_NAME"),
|
|
58
|
+
"META": norm
|
|
59
|
+
}
|
|
File without changes
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
def normalize_narrative(line: str) -> str:
|
|
4
|
+
if not line:
|
|
5
|
+
return ""
|
|
6
|
+
line = re.sub(r"^[,|]+", "", line)
|
|
7
|
+
line = re.sub(r"[\\|,]+$", "", line)
|
|
8
|
+
line = re.sub(r"\s+", " ", line)
|
|
9
|
+
return line.strip().upper()
|
|
10
|
+
|
|
11
|
+
DIRECT_DEBIT_RE = re.compile(
|
|
12
|
+
r"""
|
|
13
|
+
\b(
|
|
14
|
+
DIRECT\s+DEB(IT)? |
|
|
15
|
+
DIRECT\s+DEBIT |
|
|
16
|
+
PAYMENT |
|
|
17
|
+
PYMT |
|
|
18
|
+
WITHDRAW(AL)? |
|
|
19
|
+
AUTO(PAY)? |
|
|
20
|
+
SUBSCRIPT(ION)? |
|
|
21
|
+
MEMBERSHIP |
|
|
22
|
+
RENT
|
|
23
|
+
)\b
|
|
24
|
+
""",
|
|
25
|
+
re.VERBOSE
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
DIRECT_DEBIT_EXCLUDE_RE = re.compile(
|
|
30
|
+
r"""
|
|
31
|
+
\b(
|
|
32
|
+
ACH |
|
|
33
|
+
WIRE |
|
|
34
|
+
CARD |
|
|
35
|
+
RDC |
|
|
36
|
+
CHECK |
|
|
37
|
+
PAYPAL |
|
|
38
|
+
AVIDPAY |
|
|
39
|
+
TRANSACTION REF. |
|
|
40
|
+
RMR |
|
|
41
|
+
VENDORPYMT
|
|
42
|
+
)\b
|
|
43
|
+
""",
|
|
44
|
+
re.VERBOSE
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
def is_direct_debit(line: str) -> bool:
|
|
48
|
+
norm = normalize_narrative(line)
|
|
49
|
+
if not norm:
|
|
50
|
+
return False
|
|
51
|
+
|
|
52
|
+
if DIRECT_DEBIT_EXCLUDE_RE.search(norm):
|
|
53
|
+
return False
|
|
54
|
+
|
|
55
|
+
return bool(DIRECT_DEBIT_RE.search(norm))
|
|
56
|
+
|
|
57
|
+
REF_RE = re.compile(r"\b[A-Z0-9\-]{6,}\b")
|
|
58
|
+
|
|
59
|
+
def parse_direct_debit(line: str) -> dict:
|
|
60
|
+
norm = normalize_narrative(line)
|
|
61
|
+
|
|
62
|
+
tokens = norm.split()
|
|
63
|
+
|
|
64
|
+
counterparty_parts = []
|
|
65
|
+
for t in tokens:
|
|
66
|
+
if t in {"DIRECT", "DEBIT", "DEB", "PYMT", "PAYMENT", "PURCHASE"}:
|
|
67
|
+
break
|
|
68
|
+
counterparty_parts.append(t)
|
|
69
|
+
|
|
70
|
+
counterparty = " ".join(counterparty_parts).strip() or None
|
|
71
|
+
|
|
72
|
+
refs = REF_RE.findall(norm)
|
|
73
|
+
|
|
74
|
+
return {
|
|
75
|
+
"RAW": line,
|
|
76
|
+
"TRANS_TYPE": "DIRECT_DEBIT",
|
|
77
|
+
"ENTITY": counterparty,
|
|
78
|
+
"REFERENCE_IDS": refs,
|
|
79
|
+
}
|
|
80
|
+
|
|
File without changes
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
# 1. Normalization (run ONCE before any recognition/parsing)
|
|
4
|
+
def normalize_narrative(line: str) -> str:
|
|
5
|
+
if not line:
|
|
6
|
+
return ""
|
|
7
|
+
line = line.lstrip(",")
|
|
8
|
+
line = re.sub(r"\s+", " ", line)
|
|
9
|
+
return line.strip().upper()
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# 2. Recognition regex
|
|
14
|
+
DISBURSEMENT_RECOGNISE_RE = re.compile(
|
|
15
|
+
r"\bDISBURSEME\b\s+\d{6}\s+VAID-\d+(?:VAID-\d+)?"
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def is_disbursement_narrative(line: str) -> bool:
|
|
21
|
+
norm = normalize_narrative(line)
|
|
22
|
+
if not norm:
|
|
23
|
+
return False
|
|
24
|
+
return bool(DISBURSEMENT_RECOGNISE_RE.search(norm))
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# 3. Parsing regex
|
|
28
|
+
DISBURSEMENT_PARSE_RE = re.compile(
|
|
29
|
+
r"""
|
|
30
|
+
^
|
|
31
|
+
(?P<CTPTY_NAME>.+?)\s+
|
|
32
|
+
DISBURSEME\s+
|
|
33
|
+
(?P<DATE>\d{6})\s+
|
|
34
|
+
VAID-(?P<VAID>\d+)
|
|
35
|
+
""",
|
|
36
|
+
re.VERBOSE
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
VAID_ALL_RE = re.compile(r"VAID-(\d+)")
|
|
40
|
+
|
|
41
|
+
DISBURSEMENT_PARSE_RE = re.compile(
|
|
42
|
+
r"""
|
|
43
|
+
^
|
|
44
|
+
(?P<CTPTY_NAME>.+?)\s+
|
|
45
|
+
DISBURSEME\s+
|
|
46
|
+
(?P<DATE>\d{6})
|
|
47
|
+
""",
|
|
48
|
+
re.VERBOSE
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
def parse_disbursement_narrative(line: str) -> dict:
|
|
52
|
+
norm = normalize_narrative(line)
|
|
53
|
+
|
|
54
|
+
m = DISBURSEMENT_PARSE_RE.search(norm)
|
|
55
|
+
if not m:
|
|
56
|
+
return {}
|
|
57
|
+
|
|
58
|
+
vaids = VAID_ALL_RE.findall(norm)
|
|
59
|
+
|
|
60
|
+
return {
|
|
61
|
+
"RAW": line,
|
|
62
|
+
"ENTITY": m.group("CTPTY_NAME"),
|
|
63
|
+
"TRANS_TYPE": "DISBURSEMENT",
|
|
64
|
+
"VALUE_DATE": m.group("DATE"),
|
|
65
|
+
"VAID": vaids,
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
if __name__=='__main__':
|
|
71
|
+
pass
|
|
72
|
+
|
|
File without changes
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
# 1. NORMALIZATION
|
|
4
|
+
def normalize_narrative(line: str) -> str:
|
|
5
|
+
if not line:
|
|
6
|
+
return ""
|
|
7
|
+
line = line.lstrip(",")
|
|
8
|
+
line = re.sub(r"\s+", " ", line)
|
|
9
|
+
return line.strip().upper()
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# 2. RECOGNIZER
|
|
14
|
+
FUNDS_TRANSFER_RECOGNISE_RE = re.compile(
|
|
15
|
+
r"\b(FUNDS|SWEEP)\b.*\b(TO|FROM|FRMDEP)\b"
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
def is_funds_transfer_frmdep(line: str) -> bool:
|
|
19
|
+
return bool(FUNDS_TRANSFER_RECOGNISE_RE.search(normalize_narrative(line)))
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# 3. PARSER
|
|
24
|
+
FUNDS_TRANSFER_PARSE_RE = re.compile(
|
|
25
|
+
r"""
|
|
26
|
+
(?:REF\s+(?P<REF_NO>\w+)\s+)?
|
|
27
|
+
(?:FUNDS|SWEEP)\s+
|
|
28
|
+
(?:TRANSF(?:ER|R)\s+)?
|
|
29
|
+
|
|
30
|
+
(?:
|
|
31
|
+
FRMDEP\s+(?P<FROM_ACCOUNT>\d+|[X*]+\d+) |
|
|
32
|
+
FROM\s+.*?(?P<FROM_ACCOUNT2>\d+|[X*]+\d+) |
|
|
33
|
+
TO\s+.*?(?P<TO_ACCOUNT>\d+|[X*]+\d+)
|
|
34
|
+
)
|
|
35
|
+
""",
|
|
36
|
+
re.VERBOSE
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# 4. FEE NORMALIZATION
|
|
41
|
+
def normalize_fee_text(text: str) -> str:
|
|
42
|
+
t = re.sub(r"\s+", " ", text.upper())
|
|
43
|
+
t = re.sub(r"\bMONTH\s*LY\b", "MONTHLY", t)
|
|
44
|
+
t = re.sub(
|
|
45
|
+
r"\b(MGMT|MGM|MANAG(E|EMENT)?)\s*(FEE|FE|EE)\b",
|
|
46
|
+
"MANAGEMENT FEE",
|
|
47
|
+
t
|
|
48
|
+
)
|
|
49
|
+
return t.strip()
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# 5. FINAL PARSER
|
|
54
|
+
def parse_funds_transfer_frmdep(line: str) -> dict:
|
|
55
|
+
norm = normalize_narrative(line)
|
|
56
|
+
m = FUNDS_TRANSFER_PARSE_RE.search(norm)
|
|
57
|
+
|
|
58
|
+
if not m:
|
|
59
|
+
return {}
|
|
60
|
+
|
|
61
|
+
out = {
|
|
62
|
+
"RAW": line,
|
|
63
|
+
"TRANS_TYPE": "INTERNAL_FUNDS_TRANSFER",
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
if m.group("REF_NO"):
|
|
67
|
+
out["REF_NO"] = m.group("REF_NO")
|
|
68
|
+
|
|
69
|
+
from_acct = m.group("FROM_ACCOUNT") or m.group("FROM_ACCOUNT2")
|
|
70
|
+
to_acct = m.group("TO_ACCOUNT")
|
|
71
|
+
|
|
72
|
+
if from_acct:
|
|
73
|
+
out["FROM_ACCOUNT"] = from_acct
|
|
74
|
+
out["DIRECTION"] = "DEBIT"
|
|
75
|
+
|
|
76
|
+
if to_acct:
|
|
77
|
+
out["TO_ACCOUNT"] = to_acct
|
|
78
|
+
out["DIRECTION"] = "CREDIT"
|
|
79
|
+
|
|
80
|
+
return out
|
|
File without changes
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from counterparty.key_engine.keys import KEYS, INLINE_KEYS
|
|
3
|
+
|
|
4
|
+
KEYS = sorted(set(KEYS), key=len, reverse=True)
|
|
5
|
+
INLINE_KEYS = sorted(INLINE_KEYS, key=len, reverse=True)
|
|
6
|
+
|
|
7
|
+
ALLOWED = {' ', ':', '=',',', '/', '\\', '_', '#','-',';','-'}
|
|
8
|
+
|
|
9
|
+
def is_standalone(text, i, k_len):
|
|
10
|
+
before = text[i - 1] if i > 0 else ' '
|
|
11
|
+
after = text[i + k_len] if i + k_len < len(text) else ' '
|
|
12
|
+
return before in ALLOWED and after in ALLOWED
|
|
13
|
+
|
|
14
|
+
def find_keys(text, key_list):
|
|
15
|
+
found = {}
|
|
16
|
+
reserved = []
|
|
17
|
+
|
|
18
|
+
for k in key_list:
|
|
19
|
+
k_len = len(k)
|
|
20
|
+
for i in range(len(text) - k_len + 1):
|
|
21
|
+
if text[i:i + k_len] != k:
|
|
22
|
+
continue
|
|
23
|
+
if not is_standalone(text, i, k_len):
|
|
24
|
+
continue
|
|
25
|
+
if any(s <= i < e for s, e in reserved):
|
|
26
|
+
continue
|
|
27
|
+
|
|
28
|
+
found[i] = k
|
|
29
|
+
reserved.append((i, i + k_len))
|
|
30
|
+
|
|
31
|
+
return dict(sorted(found.items()))
|
|
32
|
+
|
|
33
|
+
def split_inline_keys(text: str):
|
|
34
|
+
marks = find_keys(text, INLINE_KEYS)
|
|
35
|
+
|
|
36
|
+
if not marks:
|
|
37
|
+
return text.strip()
|
|
38
|
+
|
|
39
|
+
idx = [0] + list(marks.keys()) + [len(text)]
|
|
40
|
+
out = {}
|
|
41
|
+
|
|
42
|
+
out["value"] = text[:idx[1]].strip()
|
|
43
|
+
|
|
44
|
+
for i in range(1, len(idx) - 1):
|
|
45
|
+
start, end = idx[i], idx[i + 1]
|
|
46
|
+
k = marks[start]
|
|
47
|
+
z = start + len(k)
|
|
48
|
+
while z < len(text) and not text[z].isalnum():
|
|
49
|
+
z += 1
|
|
50
|
+
out[k] = text[z:end].strip()
|
|
51
|
+
return out
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def all_parser_v2(v1_output: dict):
|
|
55
|
+
out = {}
|
|
56
|
+
|
|
57
|
+
for k, v in v1_output.items():
|
|
58
|
+
if isinstance(v, str):
|
|
59
|
+
out[k] = split_inline_keys(v)
|
|
60
|
+
else:
|
|
61
|
+
out[k] = v
|
|
62
|
+
|
|
63
|
+
return out
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def all_parser_v1(narr: str):
|
|
67
|
+
marks = find_keys(narr, KEYS)
|
|
68
|
+
|
|
69
|
+
idx = [0] + list(marks.keys()) + [len(narr)]
|
|
70
|
+
out = {}
|
|
71
|
+
|
|
72
|
+
out["META"] = narr[:idx[1]].strip()
|
|
73
|
+
|
|
74
|
+
for i in range(1, len(idx) - 1):
|
|
75
|
+
start, end = idx[i], idx[i + 1]
|
|
76
|
+
k = marks[start]
|
|
77
|
+
z = start + len(k)
|
|
78
|
+
while z < len(narr) and not narr[z].isalnum():
|
|
79
|
+
z += 1
|
|
80
|
+
|
|
81
|
+
out[k] = narr[z:end].strip()
|
|
82
|
+
|
|
83
|
+
return out
|
|
84
|
+
|
|
85
|
+
def all_parser(narr: str):
|
|
86
|
+
v1 = all_parser_v1(narr)
|
|
87
|
+
v2 = all_parser_v2(v1)
|
|
88
|
+
return {
|
|
89
|
+
"RAW": narr,
|
|
90
|
+
**v2
|
|
91
|
+
}
|
|
File without changes
|