counterparty 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- counterparty/__init__.py +29 -0
- counterparty/extraction/__init__.py +0 -0
- counterparty/extraction/clean.py +124 -0
- counterparty/extraction/extract_payer_payee.py +160 -0
- counterparty/extraction/infer_counterparty.py +17 -0
- counterparty/key_engine/__init__.py +0 -0
- counterparty/key_engine/canonical_keys.json +326 -0
- counterparty/key_engine/key_detector.py +335 -0
- counterparty/key_engine/keys.py +332 -0
- counterparty/parsers/LAT_AM/LAT_AM_Entry.py +91 -0
- counterparty/parsers/LAT_AM/__init__.py +0 -0
- counterparty/parsers/LAT_AM/pattern1.py +169 -0
- counterparty/parsers/LAT_AM/pattern10.py +76 -0
- counterparty/parsers/LAT_AM/pattern11.py +76 -0
- counterparty/parsers/LAT_AM/pattern12.py +99 -0
- counterparty/parsers/LAT_AM/pattern2.py +102 -0
- counterparty/parsers/LAT_AM/pattern3.py +75 -0
- counterparty/parsers/LAT_AM/pattern4.py +128 -0
- counterparty/parsers/LAT_AM/pattern5.py +54 -0
- counterparty/parsers/LAT_AM/pattern6.py +141 -0
- counterparty/parsers/LAT_AM/pattern7.py +116 -0
- counterparty/parsers/LAT_AM/pattern8.py +134 -0
- counterparty/parsers/LAT_AM/pattern9.py +86 -0
- counterparty/parsers/__init__.py +0 -0
- counterparty/parsers/ach/__init__.py +0 -0
- counterparty/parsers/ach/ach_parser.py +190 -0
- counterparty/parsers/avidpay/__init__.py +0 -0
- counterparty/parsers/avidpay/avidp_check_parser.py +82 -0
- counterparty/parsers/avidpay/avidp_gen_parser.py +59 -0
- counterparty/parsers/directdebit/__init__.py +0 -0
- counterparty/parsers/directdebit/directdeb.py +80 -0
- counterparty/parsers/disbursement/__init__.py +0 -0
- counterparty/parsers/disbursement/disb_parser.py +72 -0
- counterparty/parsers/fundsTransfer/__init__.py +0 -0
- counterparty/parsers/fundsTransfer/fundsTrans_parser.py +80 -0
- counterparty/parsers/generic/__init__.py +0 -0
- counterparty/parsers/generic/all_parser.py +91 -0
- counterparty/parsers/merchref/__init__.py +0 -0
- counterparty/parsers/merchref/merch_ref_parser.py +47 -0
- counterparty/parsers/misc/__init__.py +0 -0
- counterparty/parsers/misc/cardp.py +61 -0
- counterparty/parsers/misc/invo.py +78 -0
- counterparty/parsers/misc/webt.py +55 -0
- counterparty/parsers/paypal/__init__.py +0 -0
- counterparty/parsers/paypal/paypal.py +118 -0
- counterparty/parsers/processor_eft/__init__.py +0 -0
- counterparty/parsers/processor_eft/peft.py +110 -0
- counterparty/parsers/remittance/__init__.py +0 -0
- counterparty/parsers/remittance/remi.py +79 -0
- counterparty/parsers/swift/__init__.py +0 -0
- counterparty/parsers/swift/swift_parser.py +97 -0
- counterparty/parsers/vendorpay/__init__.py +0 -0
- counterparty/parsers/vendorpay/vp_parser.py +54 -0
- counterparty/parsers/vendorpymt/__init__.py +0 -0
- counterparty/parsers/vendorpymt/vpymt_parser.py +132 -0
- counterparty/parsers/wire/__init__.py +0 -0
- counterparty/parsers/wire/wire_parser.py +137 -0
- counterparty/route.py +116 -0
- counterparty/routines.py +72 -0
- counterparty/util.py +40 -0
- counterparty-0.1.6.dist-info/METADATA +9 -0
- counterparty-0.1.6.dist-info/RECORD +64 -0
- counterparty-0.1.6.dist-info/WHEEL +5 -0
- counterparty-0.1.6.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
from counterparty.parsers.LAT_AM.pattern1 import is_pattern1, parse_pattern1
|
|
2
|
+
from counterparty.parsers.LAT_AM.pattern2 import is_pattern2, parse_pattern2
|
|
3
|
+
from counterparty.parsers.LAT_AM.pattern3 import is_pattern3, parse_pattern3
|
|
4
|
+
from counterparty.parsers.LAT_AM.pattern4 import is_pattern4, parse_pattern4
|
|
5
|
+
from counterparty.parsers.LAT_AM.pattern5 import is_pattern5, parse_pattern5
|
|
6
|
+
from counterparty.parsers.LAT_AM.pattern6 import is_pattern6, parse_pattern6
|
|
7
|
+
from counterparty.parsers.LAT_AM.pattern7 import is_pattern7, parse_pattern7
|
|
8
|
+
from counterparty.parsers.LAT_AM.pattern8 import is_pattern8, parse_pattern8
|
|
9
|
+
from counterparty.parsers.LAT_AM.pattern9 import is_pattern9, parse_pattern9
|
|
10
|
+
from counterparty.parsers.LAT_AM.pattern10 import is_pattern10, parse_pattern10
|
|
11
|
+
from counterparty.parsers.LAT_AM.pattern11 import is_pattern11, parse_pattern11
|
|
12
|
+
from counterparty.parsers.LAT_AM.pattern12 import is_pattern12, parse_pattern12
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def is_LATAM(line: str) -> int | None:
|
|
16
|
+
if not line:
|
|
17
|
+
return None
|
|
18
|
+
|
|
19
|
+
line = line.strip()
|
|
20
|
+
if not line:
|
|
21
|
+
return None
|
|
22
|
+
|
|
23
|
+
if is_pattern1(line):
|
|
24
|
+
return 1
|
|
25
|
+
if is_pattern2(line):
|
|
26
|
+
return 2
|
|
27
|
+
if is_pattern3(line):
|
|
28
|
+
return 3
|
|
29
|
+
if is_pattern4(line):
|
|
30
|
+
return 4
|
|
31
|
+
if is_pattern5(line):
|
|
32
|
+
return 5
|
|
33
|
+
if is_pattern6(line):
|
|
34
|
+
return 6
|
|
35
|
+
if is_pattern7(line):
|
|
36
|
+
return 7
|
|
37
|
+
if is_pattern8(line):
|
|
38
|
+
return 8
|
|
39
|
+
if is_pattern9(line):
|
|
40
|
+
return 9
|
|
41
|
+
if is_pattern10(line):
|
|
42
|
+
return 10
|
|
43
|
+
if is_pattern11(line):
|
|
44
|
+
return 11
|
|
45
|
+
if is_pattern12(line):
|
|
46
|
+
return 12
|
|
47
|
+
|
|
48
|
+
return None
|
|
49
|
+
|
|
50
|
+
def LATAM_parse(line: str) -> dict | None:
|
|
51
|
+
|
|
52
|
+
pat_no = is_LATAM(line)
|
|
53
|
+
if pat_no is None:
|
|
54
|
+
return None
|
|
55
|
+
|
|
56
|
+
print(f"LATIN AMERICAN Pattern-{pat_no}\n")
|
|
57
|
+
|
|
58
|
+
res = None
|
|
59
|
+
if pat_no == 1:
|
|
60
|
+
res = parse_pattern1(line)
|
|
61
|
+
elif pat_no == 2:
|
|
62
|
+
res = parse_pattern2(line)
|
|
63
|
+
elif pat_no == 3:
|
|
64
|
+
res = parse_pattern3(line)
|
|
65
|
+
elif pat_no == 4:
|
|
66
|
+
res = parse_pattern4(line)
|
|
67
|
+
elif pat_no == 5:
|
|
68
|
+
res = parse_pattern5(line)
|
|
69
|
+
elif pat_no == 6:
|
|
70
|
+
res = parse_pattern6(line)
|
|
71
|
+
elif pat_no == 7:
|
|
72
|
+
res = parse_pattern7(line)
|
|
73
|
+
elif pat_no == 8:
|
|
74
|
+
res = parse_pattern8(line)
|
|
75
|
+
elif pat_no == 9:
|
|
76
|
+
res = parse_pattern9(line)
|
|
77
|
+
elif pat_no == 10:
|
|
78
|
+
res = parse_pattern10(line)
|
|
79
|
+
elif pat_no == 11:
|
|
80
|
+
res = parse_pattern11(line)
|
|
81
|
+
elif pat_no == 12:
|
|
82
|
+
res = parse_pattern12(line)
|
|
83
|
+
|
|
84
|
+
if isinstance(res, dict):
|
|
85
|
+
# Ensure RAW is first key
|
|
86
|
+
return {
|
|
87
|
+
"RAW": line,
|
|
88
|
+
**res
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
return res
|
|
File without changes
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from typing import Dict, Optional
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def normalize_narrative(line: str) -> str:
|
|
6
|
+
if not line:
|
|
7
|
+
return ""
|
|
8
|
+
line = line.upper()
|
|
9
|
+
line = re.sub(r"^[,|\\]+", "", line)
|
|
10
|
+
line = re.sub(r"[\\|,]+$", "", line)
|
|
11
|
+
line = re.sub(r"\s+", " ", line)
|
|
12
|
+
return line.strip()
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def is_pattern1(line: str) -> bool:
|
|
16
|
+
return normalize_narrative(line).startswith("/PT/")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def is_pattern1a(line: str) -> bool:
|
|
20
|
+
txt = normalize_narrative(line)
|
|
21
|
+
return bool(re.match(r"^/PT/DE/EI/", txt))
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def is_pattern1b(line: str) -> bool:
|
|
25
|
+
txt = normalize_narrative(line)
|
|
26
|
+
|
|
27
|
+
if not txt.startswith("/PT/"):
|
|
28
|
+
return False
|
|
29
|
+
|
|
30
|
+
parts = [p for p in txt.split("/") if p]
|
|
31
|
+
if len(parts) < 6:
|
|
32
|
+
return False
|
|
33
|
+
|
|
34
|
+
kv = 0
|
|
35
|
+
i = 1
|
|
36
|
+
while i + 1 < len(parts):
|
|
37
|
+
k = parts[i]
|
|
38
|
+
v = parts[i + 1]
|
|
39
|
+
if 1 <= len(k) <= 4 and v:
|
|
40
|
+
kv += 1
|
|
41
|
+
i += 2
|
|
42
|
+
else:
|
|
43
|
+
i += 1
|
|
44
|
+
|
|
45
|
+
return kv >= 3
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def is_pattern1c(line: str) -> bool:
|
|
49
|
+
txt = normalize_narrative(line)
|
|
50
|
+
if not txt.startswith("/PT/"):
|
|
51
|
+
return False
|
|
52
|
+
return True
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def detect_pattern1_variant(line: str) -> str:
|
|
56
|
+
if is_pattern1a(line):
|
|
57
|
+
return "1A"
|
|
58
|
+
if is_pattern1b(line):
|
|
59
|
+
return "1B"
|
|
60
|
+
if is_pattern1c(line):
|
|
61
|
+
return "1C"
|
|
62
|
+
return "UNKNOWN"
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
pattern1a_full = re.compile(
|
|
66
|
+
r"^/([A-Z/]+)\s+REF\.?\s+(\d+)\s+A\sF/V\s+(.+?)\s+([A-Z0-9]{2,5})\s+([0-9]{2})(?:/([A-Z0-9]{2,10})/([0-9]{1,5})/([A-Z0-9]+))?$",
|
|
67
|
+
re.IGNORECASE
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
pattern1a_short = re.compile(
|
|
71
|
+
r"^/([A-Z/]+)([0-9]+)?(?:[-\s]+(.+))?$",
|
|
72
|
+
re.IGNORECASE
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def parse_pattern1a(line: str) -> Optional[Dict]:
|
|
77
|
+
txt = normalize_narrative(line)
|
|
78
|
+
|
|
79
|
+
m = pattern1a_full.match(txt)
|
|
80
|
+
if m:
|
|
81
|
+
(
|
|
82
|
+
flags_raw,
|
|
83
|
+
reference_id,
|
|
84
|
+
beneficiary,
|
|
85
|
+
internal_code,
|
|
86
|
+
seq_no,
|
|
87
|
+
tag,
|
|
88
|
+
code,
|
|
89
|
+
action
|
|
90
|
+
) = m.groups()
|
|
91
|
+
|
|
92
|
+
return {
|
|
93
|
+
"variant": "1A",
|
|
94
|
+
"transaction_type": "PAYMENT",
|
|
95
|
+
"flags": [f for f in flags_raw.split("/") if f],
|
|
96
|
+
"reference_id": reference_id,
|
|
97
|
+
"beneficiary": beneficiary.strip(),
|
|
98
|
+
"internal_code": internal_code,
|
|
99
|
+
"sequence_number": seq_no,
|
|
100
|
+
"control_tag": tag,
|
|
101
|
+
"control_code": code,
|
|
102
|
+
"transaction_action": action,
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
m = pattern1a_short.match(txt)
|
|
106
|
+
if m:
|
|
107
|
+
flags_raw, internal_code, detail = m.groups()
|
|
108
|
+
return {
|
|
109
|
+
"variant": "1A",
|
|
110
|
+
"transaction_type": "PAYMENT",
|
|
111
|
+
"flags": [f for f in flags_raw.split("/") if f],
|
|
112
|
+
"internal_code": internal_code,
|
|
113
|
+
"detail": detail,
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
return None
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def parse_pattern1b(line: str) -> Dict:
|
|
120
|
+
txt = normalize_narrative(line)
|
|
121
|
+
parts = [p for p in txt.split("/") if p]
|
|
122
|
+
|
|
123
|
+
fields = {}
|
|
124
|
+
i = 1
|
|
125
|
+
while i + 1 < len(parts):
|
|
126
|
+
k = parts[i]
|
|
127
|
+
v = parts[i + 1]
|
|
128
|
+
if 1 <= len(k) <= 4:
|
|
129
|
+
if k not in fields:
|
|
130
|
+
fields[k] = v.strip()
|
|
131
|
+
i += 2
|
|
132
|
+
else:
|
|
133
|
+
i += 1
|
|
134
|
+
|
|
135
|
+
return {
|
|
136
|
+
"variant": "1B",
|
|
137
|
+
"transaction_type": "SETTLEMENT",
|
|
138
|
+
"flags": [parts[0]],
|
|
139
|
+
**fields
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def parse_pattern1c(line: str) -> Dict:
|
|
144
|
+
txt = normalize_narrative(line)
|
|
145
|
+
parts = [p for p in txt.split("/") if p]
|
|
146
|
+
|
|
147
|
+
return {
|
|
148
|
+
"variant": "1C",
|
|
149
|
+
"transaction_type": "PATTERN1_PARTIAL",
|
|
150
|
+
"tokens": parts
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def parse_pattern1(line: str) -> Optional[Dict]:
|
|
155
|
+
if not is_pattern1(line):
|
|
156
|
+
return None
|
|
157
|
+
|
|
158
|
+
v = detect_pattern1_variant(line)
|
|
159
|
+
|
|
160
|
+
if v == "1A":
|
|
161
|
+
return parse_pattern1a(line)
|
|
162
|
+
|
|
163
|
+
if v == "1B":
|
|
164
|
+
return parse_pattern1b(line)
|
|
165
|
+
|
|
166
|
+
if v == "1C":
|
|
167
|
+
return parse_pattern1c(line)
|
|
168
|
+
|
|
169
|
+
return {}
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import json
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def normalize_narrative(line: str) -> str:
|
|
6
|
+
if not line:
|
|
7
|
+
return ""
|
|
8
|
+
line = line.upper()
|
|
9
|
+
line = re.sub(r"[\\|,]+", " ", line)
|
|
10
|
+
line = re.sub(r"\s+", " ", line)
|
|
11
|
+
return line.strip()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
pattern10 = re.compile(
|
|
15
|
+
r"/IDCODE/",
|
|
16
|
+
re.IGNORECASE
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def is_pattern10(line: str) -> bool:
|
|
21
|
+
if not line:
|
|
22
|
+
return False
|
|
23
|
+
txt = normalize_narrative(line)
|
|
24
|
+
return bool(pattern10.search(txt))
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def extract_payer_id(txt: str) -> str | None:
|
|
28
|
+
m = re.search(r"/IDCODE/([A-Z0-9]+)/", txt)
|
|
29
|
+
return m.group(1) if m else None
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def extract_collection_code(txt: str) -> str | None:
|
|
33
|
+
m = re.search(r"/COLLEC/([A-Z0-9]+)/", txt)
|
|
34
|
+
return m.group(1) if m else None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def extract_payer_name(txt: str) -> str | None:
|
|
38
|
+
m = re.search(r"/PAYER/([^/]+)/", txt)
|
|
39
|
+
if m:
|
|
40
|
+
return m.group(1).strip()
|
|
41
|
+
return None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def extract_info(txt: str) -> str | None:
|
|
45
|
+
m = re.search(r"/INFO/(.+)$", txt)
|
|
46
|
+
if m:
|
|
47
|
+
return m.group(1).strip()
|
|
48
|
+
return None
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def parse_pattern10(line: str) -> dict | None:
|
|
52
|
+
if not line:
|
|
53
|
+
return None
|
|
54
|
+
|
|
55
|
+
txt = normalize_narrative(line)
|
|
56
|
+
|
|
57
|
+
if not is_pattern10(txt):
|
|
58
|
+
return None
|
|
59
|
+
|
|
60
|
+
payer_id = extract_payer_id(txt)
|
|
61
|
+
coll = extract_collection_code(txt)
|
|
62
|
+
name = extract_payer_name(txt)
|
|
63
|
+
info = extract_info(txt)
|
|
64
|
+
|
|
65
|
+
return {
|
|
66
|
+
"transaction_type": "ACH_COLLECTION",
|
|
67
|
+
"direction": "INCOMING",
|
|
68
|
+
"payer_name": name,
|
|
69
|
+
"payer_id": payer_id,
|
|
70
|
+
"collection_code": coll,
|
|
71
|
+
"info_text": info
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
if __name__ == "__main__":
|
|
76
|
+
pass
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import json
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def normalize_narrative(line: str) -> str:
|
|
6
|
+
if not line:
|
|
7
|
+
return ""
|
|
8
|
+
line = line.upper()
|
|
9
|
+
line = re.sub(r"^[,|\\]+", "", line)
|
|
10
|
+
line = re.sub(r"[\\|,]+$", "", line)
|
|
11
|
+
line = re.sub(r"\s+", " ", line)
|
|
12
|
+
return line.strip()
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
card_keywords = [
|
|
16
|
+
"DINERS CLUB",
|
|
17
|
+
"DINERS",
|
|
18
|
+
"VISA",
|
|
19
|
+
"VISANET",
|
|
20
|
+
"MASTERCARD",
|
|
21
|
+
"AMEX",
|
|
22
|
+
"AMERICAN EXPRESS",
|
|
23
|
+
"DISCOVER",
|
|
24
|
+
"REDCARD",
|
|
25
|
+
"PROSA"
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
pattern11 = re.compile(
|
|
30
|
+
"|".join(card_keywords),
|
|
31
|
+
re.IGNORECASE
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def is_pattern11(line: str) -> bool:
|
|
36
|
+
if not line:
|
|
37
|
+
return False
|
|
38
|
+
txt = normalize_narrative(line)
|
|
39
|
+
return bool(pattern11.search(txt))
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def extract_posting_code(txt: str) -> str | None:
|
|
43
|
+
# leading alphanumeric block e.g. 2401DE
|
|
44
|
+
m = re.match(r"([A-Z0-9]+)", txt)
|
|
45
|
+
return m.group(1) if m else None
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def extract_network(txt: str) -> str | None:
|
|
49
|
+
for w in card_keywords:
|
|
50
|
+
if w in txt:
|
|
51
|
+
return w
|
|
52
|
+
return None
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def parse_pattern11(line: str) -> dict | None:
|
|
56
|
+
if not line:
|
|
57
|
+
return None
|
|
58
|
+
|
|
59
|
+
txt = normalize_narrative(line)
|
|
60
|
+
|
|
61
|
+
if not is_pattern11(txt):
|
|
62
|
+
return None
|
|
63
|
+
|
|
64
|
+
posting_code = extract_posting_code(txt)
|
|
65
|
+
network = extract_network(txt)
|
|
66
|
+
|
|
67
|
+
return {
|
|
68
|
+
"transaction_type": "CARD_PAYMENT",
|
|
69
|
+
"direction": "OUTGOING",
|
|
70
|
+
"card_network": network,
|
|
71
|
+
"posting_code": posting_code
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
if __name__ == "__main__":
|
|
76
|
+
pass
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import json
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def normalize_narrative(line: str) -> str:
|
|
6
|
+
if not line:
|
|
7
|
+
return ""
|
|
8
|
+
line = line.upper()
|
|
9
|
+
line = re.sub(r"[\\|,]+", " ", line)
|
|
10
|
+
line = re.sub(r"\s+", " ", line)
|
|
11
|
+
return line.strip()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
pattern12 = re.compile(r"\bSPEI\b", re.IGNORECASE)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def is_pattern12(line: str) -> bool:
|
|
18
|
+
if not line:
|
|
19
|
+
return False
|
|
20
|
+
txt = normalize_narrative(line)
|
|
21
|
+
return "SPEI" in txt
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def infer_direction(txt: str) -> str | None:
|
|
25
|
+
if any(x in txt for x in ["ENVIADO", "SALIENTE"]):
|
|
26
|
+
return "OUTGOING"
|
|
27
|
+
if any(x in txt for x in ["RECIBIDO", "ABONO", "INGRESO"]):
|
|
28
|
+
return "INCOMING"
|
|
29
|
+
if any(x in txt for x in ["DEVUELTO", "RECHAZADO", "REVERSO"]):
|
|
30
|
+
return "RETURNED"
|
|
31
|
+
return None
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def extract_reference(txt: str) -> str | None:
|
|
35
|
+
nums = re.findall(r"\b([0-9]{10,})\b", txt)
|
|
36
|
+
if not nums:
|
|
37
|
+
return None
|
|
38
|
+
return max(nums, key=len)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def extract_originating_bank(txt: str) -> str | None:
|
|
42
|
+
m = re.search(
|
|
43
|
+
r"SPEI\s+(ENVIADO|SALIENTE|RECIBIDO|ABONO|INGRESO|DEVUELTO|RECHAZADO|REVERSO)\s+(.+?)\s+[0-9]",
|
|
44
|
+
txt
|
|
45
|
+
)
|
|
46
|
+
if m:
|
|
47
|
+
return m.group(2).strip()
|
|
48
|
+
return None
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def extract_counterparty(txt: str, reference: str | None) -> str | None:
|
|
52
|
+
if not reference:
|
|
53
|
+
return None
|
|
54
|
+
|
|
55
|
+
m = re.search(reference + r"\s+(.+)$", txt)
|
|
56
|
+
if not m:
|
|
57
|
+
return None
|
|
58
|
+
|
|
59
|
+
tail = m.group(1).strip()
|
|
60
|
+
tokens = tail.split()
|
|
61
|
+
|
|
62
|
+
ctpty = []
|
|
63
|
+
for t in tokens:
|
|
64
|
+
# purely numeric long token → stop
|
|
65
|
+
if re.match(r"^[0-9]{6,}$", t):
|
|
66
|
+
break
|
|
67
|
+
# extremely long alphanumeric code → stop
|
|
68
|
+
if re.match(r"^[A-Z0-9]{12,}$", t):
|
|
69
|
+
break
|
|
70
|
+
ctpty.append(t)
|
|
71
|
+
|
|
72
|
+
return " ".join(ctpty) if ctpty else None
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def parse_pattern12(line: str) -> dict | None:
|
|
76
|
+
if not line:
|
|
77
|
+
return None
|
|
78
|
+
|
|
79
|
+
txt = normalize_narrative(line)
|
|
80
|
+
|
|
81
|
+
if not is_pattern12(txt):
|
|
82
|
+
return None
|
|
83
|
+
|
|
84
|
+
direction = infer_direction(txt)
|
|
85
|
+
reference = extract_reference(txt)
|
|
86
|
+
bank = extract_originating_bank(txt)
|
|
87
|
+
counterparty = extract_counterparty(txt, reference)
|
|
88
|
+
|
|
89
|
+
return {
|
|
90
|
+
"transaction_type": "SPEI_TRANSFER",
|
|
91
|
+
"direction": direction,
|
|
92
|
+
"originating_bank": bank,
|
|
93
|
+
"ENTITY": counterparty,
|
|
94
|
+
"reference_id": reference
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
if __name__ == "__main__":
|
|
99
|
+
pass
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import json
|
|
3
|
+
|
|
4
|
+
def normalize_narrative(line: str) -> str:
|
|
5
|
+
if not line:
|
|
6
|
+
return ""
|
|
7
|
+
line = line.upper()
|
|
8
|
+
line = re.sub(r"^[,|\\]+", "", line)
|
|
9
|
+
line = re.sub(r"[\\|,]+$", "", line)
|
|
10
|
+
line = re.sub(r"\s+", " ", line)
|
|
11
|
+
return line.strip()
|
|
12
|
+
|
|
13
|
+
pattern2 = re.compile(
|
|
14
|
+
r"^/[A-Z]{3}/\d{4}-PAGO TRANSFERENCIA",
|
|
15
|
+
re.IGNORECASE
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
def is_pattern2(line: str) -> bool:
|
|
19
|
+
if not line:
|
|
20
|
+
return False
|
|
21
|
+
line_n = normalize_narrative(line)
|
|
22
|
+
return bool(pattern2.search(line_n))
|
|
23
|
+
|
|
24
|
+
pattern2_parse = re.compile(
|
|
25
|
+
r"""
|
|
26
|
+
^
|
|
27
|
+
/([A-Z]{3}) # ledger class
|
|
28
|
+
/([0-9]{4})-PAGO\sTRANSFERENCIA\s
|
|
29
|
+
([A-Z]+) # clearing system (SPEI)
|
|
30
|
+
/([A-Z]+) # channel marker (HTC)
|
|
31
|
+
/([A-Z]+) # operation type (TRF)
|
|
32
|
+
/([A-Z]+) # account flag (AIN)
|
|
33
|
+
/(.+)$ # data block
|
|
34
|
+
""",
|
|
35
|
+
re.IGNORECASE | re.VERBOSE
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
RESERVED_TOKENS = {"RFC", "IVA", "REF", "TAX", "NUM", "ID"}
|
|
39
|
+
|
|
40
|
+
def extract_counterparty(raw: str) -> str | None:
|
|
41
|
+
tokens = raw.split()
|
|
42
|
+
clean = []
|
|
43
|
+
for t in tokens:
|
|
44
|
+
if t in RESERVED_TOKENS:
|
|
45
|
+
break
|
|
46
|
+
clean.append(t)
|
|
47
|
+
if not clean:
|
|
48
|
+
return None
|
|
49
|
+
return " ".join(clean).strip()
|
|
50
|
+
|
|
51
|
+
def extract_rfc(raw: str) -> str | None:
|
|
52
|
+
match = re.search(r"RFC\s+([A-Z0-9]+)", raw)
|
|
53
|
+
return match.group(1) if match else None
|
|
54
|
+
|
|
55
|
+
def extract_iva(raw: str) -> str | None:
|
|
56
|
+
match = re.search(r"IVA\s+([0-9]+)", raw)
|
|
57
|
+
return match.group(1) if match else None
|
|
58
|
+
|
|
59
|
+
def parse_pattern2(line: str) -> dict | None:
|
|
60
|
+
if not line:
|
|
61
|
+
return None
|
|
62
|
+
|
|
63
|
+
line_n = normalize_narrative(line)
|
|
64
|
+
match = pattern2_parse.match(line_n)
|
|
65
|
+
if not match:
|
|
66
|
+
return None
|
|
67
|
+
|
|
68
|
+
(
|
|
69
|
+
ledger_class,
|
|
70
|
+
txn_code,
|
|
71
|
+
clearing_system,
|
|
72
|
+
channel_marker,
|
|
73
|
+
op_code,
|
|
74
|
+
acct_flag,
|
|
75
|
+
raw
|
|
76
|
+
) = match.groups()
|
|
77
|
+
|
|
78
|
+
result = {
|
|
79
|
+
"ledger_class": ledger_class,
|
|
80
|
+
"transaction_code": txn_code,
|
|
81
|
+
"transaction_category": "PAYMENT",
|
|
82
|
+
"transaction_type": "ACCOUNT_TRANSFER",
|
|
83
|
+
"operation_type": "TRANSFER",
|
|
84
|
+
"clearing_system": clearing_system,
|
|
85
|
+
"is_interbank": True,
|
|
86
|
+
"account_based": (acct_flag == "AIN"),
|
|
87
|
+
"channel_marker": channel_marker,
|
|
88
|
+
"ENTITY": extract_counterparty(raw)
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
rfc = extract_rfc(raw)
|
|
92
|
+
if rfc:
|
|
93
|
+
result["ENTITY_TAX_ID"] = rfc
|
|
94
|
+
|
|
95
|
+
iva = extract_iva(raw)
|
|
96
|
+
if iva:
|
|
97
|
+
result["tax_rate"] = iva
|
|
98
|
+
|
|
99
|
+
return result
|
|
100
|
+
|
|
101
|
+
if __name__ == "__main__":
|
|
102
|
+
pass
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import json
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def normalize_narrative(line: str) -> str:
|
|
6
|
+
if not line:
|
|
7
|
+
return ""
|
|
8
|
+
line = line.upper()
|
|
9
|
+
line = re.sub(r"^[,|\\]+", "", line)
|
|
10
|
+
line = re.sub(r"[\\|,]+$", "", line)
|
|
11
|
+
line = re.sub(r"\s+", " ", line)
|
|
12
|
+
return line.strip()
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
pattern3 = re.compile(
|
|
16
|
+
r"^/?(MERCHANT PAYMENT|TRANSACTION REVERSAL)\s*RRN",
|
|
17
|
+
re.IGNORECASE
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def is_pattern3(line: str) -> bool:
|
|
22
|
+
if not line:
|
|
23
|
+
return False
|
|
24
|
+
return bool(pattern3.search(normalize_narrative(line)))
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
pattern3_parse = re.compile(
|
|
28
|
+
r"""
|
|
29
|
+
^/?(MERCHANT\ PAYMENT|TRANSACTION\ REVERSAL)\s*RRN\
|
|
30
|
+
([0-9]+) # rrn
|
|
31
|
+
\s+
|
|
32
|
+
([0-9]+) # merchant id
|
|
33
|
+
\s+
|
|
34
|
+
([0-9]{6})$ # ddmmyy
|
|
35
|
+
""",
|
|
36
|
+
re.IGNORECASE | re.VERBOSE
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def parse_pattern3(line: str) -> dict | None:
|
|
41
|
+
if not line:
|
|
42
|
+
return None
|
|
43
|
+
|
|
44
|
+
line_n = normalize_narrative(line)
|
|
45
|
+
match = pattern3_parse.match(line_n)
|
|
46
|
+
if not match:
|
|
47
|
+
return None
|
|
48
|
+
|
|
49
|
+
prefix, rrn, merchant_id, ddmmyy = match.groups()
|
|
50
|
+
|
|
51
|
+
# convert DDMMYY => YYYY-MM-DD
|
|
52
|
+
day = ddmmyy[0:2]
|
|
53
|
+
month = ddmmyy[2:4]
|
|
54
|
+
year = "20" + ddmmyy[4:6]
|
|
55
|
+
date_fmt = f"{year}-{month}-{day}"
|
|
56
|
+
|
|
57
|
+
# dynamic classification:
|
|
58
|
+
if prefix == "TRANSACTION REVERSAL":
|
|
59
|
+
category = "MERCHANT_REVERSAL"
|
|
60
|
+
direction = "INCOMING"
|
|
61
|
+
else:
|
|
62
|
+
category = "MERCHANT_PAYMENT"
|
|
63
|
+
direction = "OUTGOING"
|
|
64
|
+
|
|
65
|
+
return {
|
|
66
|
+
"transaction_category": category,
|
|
67
|
+
"direction": direction,
|
|
68
|
+
"rrn": rrn,
|
|
69
|
+
"merchant_id": merchant_id,
|
|
70
|
+
"processing_date": date_fmt
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
if __name__ == "__main__":
|
|
75
|
+
pass
|