counterparty 0.1.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. counterparty-0.1.6/PKG-INFO +9 -0
  2. counterparty-0.1.6/README.md +70 -0
  3. counterparty-0.1.6/pyproject.toml +3 -0
  4. counterparty-0.1.6/setup.cfg +4 -0
  5. counterparty-0.1.6/setup.py +16 -0
  6. counterparty-0.1.6/src/counterparty/__init__.py +29 -0
  7. counterparty-0.1.6/src/counterparty/extraction/__init__.py +0 -0
  8. counterparty-0.1.6/src/counterparty/extraction/clean.py +124 -0
  9. counterparty-0.1.6/src/counterparty/extraction/extract_payer_payee.py +160 -0
  10. counterparty-0.1.6/src/counterparty/extraction/infer_counterparty.py +17 -0
  11. counterparty-0.1.6/src/counterparty/key_engine/__init__.py +0 -0
  12. counterparty-0.1.6/src/counterparty/key_engine/canonical_keys.json +326 -0
  13. counterparty-0.1.6/src/counterparty/key_engine/key_detector.py +335 -0
  14. counterparty-0.1.6/src/counterparty/key_engine/keys.py +332 -0
  15. counterparty-0.1.6/src/counterparty/parsers/LAT_AM/LAT_AM_Entry.py +91 -0
  16. counterparty-0.1.6/src/counterparty/parsers/LAT_AM/__init__.py +0 -0
  17. counterparty-0.1.6/src/counterparty/parsers/LAT_AM/pattern1.py +169 -0
  18. counterparty-0.1.6/src/counterparty/parsers/LAT_AM/pattern10.py +76 -0
  19. counterparty-0.1.6/src/counterparty/parsers/LAT_AM/pattern11.py +76 -0
  20. counterparty-0.1.6/src/counterparty/parsers/LAT_AM/pattern12.py +99 -0
  21. counterparty-0.1.6/src/counterparty/parsers/LAT_AM/pattern2.py +102 -0
  22. counterparty-0.1.6/src/counterparty/parsers/LAT_AM/pattern3.py +75 -0
  23. counterparty-0.1.6/src/counterparty/parsers/LAT_AM/pattern4.py +128 -0
  24. counterparty-0.1.6/src/counterparty/parsers/LAT_AM/pattern5.py +54 -0
  25. counterparty-0.1.6/src/counterparty/parsers/LAT_AM/pattern6.py +141 -0
  26. counterparty-0.1.6/src/counterparty/parsers/LAT_AM/pattern7.py +116 -0
  27. counterparty-0.1.6/src/counterparty/parsers/LAT_AM/pattern8.py +134 -0
  28. counterparty-0.1.6/src/counterparty/parsers/LAT_AM/pattern9.py +86 -0
  29. counterparty-0.1.6/src/counterparty/parsers/__init__.py +0 -0
  30. counterparty-0.1.6/src/counterparty/parsers/ach/__init__.py +0 -0
  31. counterparty-0.1.6/src/counterparty/parsers/ach/ach_parser.py +190 -0
  32. counterparty-0.1.6/src/counterparty/parsers/avidpay/__init__.py +0 -0
  33. counterparty-0.1.6/src/counterparty/parsers/avidpay/avidp_check_parser.py +82 -0
  34. counterparty-0.1.6/src/counterparty/parsers/avidpay/avidp_gen_parser.py +59 -0
  35. counterparty-0.1.6/src/counterparty/parsers/directdebit/__init__.py +0 -0
  36. counterparty-0.1.6/src/counterparty/parsers/directdebit/directdeb.py +80 -0
  37. counterparty-0.1.6/src/counterparty/parsers/disbursement/__init__.py +0 -0
  38. counterparty-0.1.6/src/counterparty/parsers/disbursement/disb_parser.py +72 -0
  39. counterparty-0.1.6/src/counterparty/parsers/fundsTransfer/__init__.py +0 -0
  40. counterparty-0.1.6/src/counterparty/parsers/fundsTransfer/fundsTrans_parser.py +80 -0
  41. counterparty-0.1.6/src/counterparty/parsers/generic/__init__.py +0 -0
  42. counterparty-0.1.6/src/counterparty/parsers/generic/all_parser.py +91 -0
  43. counterparty-0.1.6/src/counterparty/parsers/merchref/__init__.py +0 -0
  44. counterparty-0.1.6/src/counterparty/parsers/merchref/merch_ref_parser.py +47 -0
  45. counterparty-0.1.6/src/counterparty/parsers/misc/__init__.py +0 -0
  46. counterparty-0.1.6/src/counterparty/parsers/misc/cardp.py +61 -0
  47. counterparty-0.1.6/src/counterparty/parsers/misc/invo.py +78 -0
  48. counterparty-0.1.6/src/counterparty/parsers/misc/webt.py +55 -0
  49. counterparty-0.1.6/src/counterparty/parsers/paypal/__init__.py +0 -0
  50. counterparty-0.1.6/src/counterparty/parsers/paypal/paypal.py +118 -0
  51. counterparty-0.1.6/src/counterparty/parsers/processor_eft/__init__.py +0 -0
  52. counterparty-0.1.6/src/counterparty/parsers/processor_eft/peft.py +110 -0
  53. counterparty-0.1.6/src/counterparty/parsers/remittance/__init__.py +0 -0
  54. counterparty-0.1.6/src/counterparty/parsers/remittance/remi.py +79 -0
  55. counterparty-0.1.6/src/counterparty/parsers/swift/__init__.py +0 -0
  56. counterparty-0.1.6/src/counterparty/parsers/swift/swift_parser.py +97 -0
  57. counterparty-0.1.6/src/counterparty/parsers/vendorpay/__init__.py +0 -0
  58. counterparty-0.1.6/src/counterparty/parsers/vendorpay/vp_parser.py +54 -0
  59. counterparty-0.1.6/src/counterparty/parsers/vendorpymt/__init__.py +0 -0
  60. counterparty-0.1.6/src/counterparty/parsers/vendorpymt/vpymt_parser.py +132 -0
  61. counterparty-0.1.6/src/counterparty/parsers/wire/__init__.py +0 -0
  62. counterparty-0.1.6/src/counterparty/parsers/wire/wire_parser.py +137 -0
  63. counterparty-0.1.6/src/counterparty/route.py +116 -0
  64. counterparty-0.1.6/src/counterparty/routines.py +72 -0
  65. counterparty-0.1.6/src/counterparty/util.py +40 -0
  66. counterparty-0.1.6/src/counterparty.egg-info/PKG-INFO +9 -0
  67. counterparty-0.1.6/src/counterparty.egg-info/SOURCES.txt +68 -0
  68. counterparty-0.1.6/src/counterparty.egg-info/dependency_links.txt +1 -0
  69. counterparty-0.1.6/src/counterparty.egg-info/requires.txt +1 -0
  70. counterparty-0.1.6/src/counterparty.egg-info/top_level.txt +1 -0
@@ -0,0 +1,9 @@
1
+ Metadata-Version: 2.4
2
+ Name: counterparty
3
+ Version: 0.1.6
4
+ Summary: A library for counterparty extraction and narrative parsing.
5
+ Requires-Python: >=3.9
6
+ Requires-Dist: rapidfuzz>=2.0.0
7
+ Dynamic: requires-dist
8
+ Dynamic: requires-python
9
+ Dynamic: summary
@@ -0,0 +1,70 @@
1
+ # Counterparty Extraction Library
2
+
3
+ A Python library designed to parse financial transaction narratives and extract key counterparty information (Payer, Payee, and Counterparty Name).
4
+
5
+ ## Features
6
+
7
+ * **Narrative Parsing**: Automatically detects transaction types (Wire, ACH, Check, etc.).
8
+ * **Entity Extraction**: Identifies the 'Payer' and 'Payee' from complex strings.
9
+ * **Clean Output**: Returns a structured JSON response separating parsing metadata from extraction results.
10
+
11
+ ## Installation
12
+
13
+ You can install this package locally using `pip`.
14
+
15
+ ### For Development (Editable Mode)
16
+ Recommended if you plan to modify the code.
17
+ ```bash
18
+ git clone <repository-url>
19
+ cd counterparty
20
+ pip install -e .
21
+ ```
22
+
23
+ ### Standard Install
24
+ ```bash
25
+ cd counterparty
26
+ pip install .
27
+ ```
28
+
29
+ ## Usage
30
+
31
+ The package provides a simple entry point `get_counterparty`.
32
+
33
+ ```python
34
+ from counterparty import get_counterparty
35
+
36
+ # 1. Define your transaction details
37
+ narrative = "WIRE TRANSFER. Orig : GOOGLE INC"
38
+ amount = 100.00 # Optional, but helps infer direction (Credit vs Debit)
39
+
40
+ # 2. Extract info
41
+ result = get_counterparty(narrative, amount=amount)
42
+
43
+ # 3. Use the result
44
+ print(result)
45
+ ```
46
+
47
+ ## Output Structure
48
+
49
+ The output is a nested dictionary with two main sections:
50
+
51
+ * `parsed`: Contains the raw parsing details and metadata (e.g., transaction type).
52
+ * `ctpty`: Contains the extracted entity information.
53
+
54
+ **Example Output:**
55
+ ```json
56
+ {
57
+ "parsed": {
58
+ "RAW": "WIRE TRANSFER. Orig: GOOGLE INC",
59
+ "ORIG": "GOOGLE INC",
60
+ "META": "WIRE TRANSFER",
61
+ "parser_type": "wire"
62
+ },
63
+ "ctpty": {
64
+ "payer": "GOOGLE INC",
65
+ "payee": null,
66
+ "counterparty": "GOOGLE INC",
67
+ "amount": 100.0
68
+ }
69
+ }
70
+ ```
@@ -0,0 +1,3 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,16 @@
1
+ from setuptools import setup, find_packages
2
+
3
+ setup(
4
+ name="counterparty",
5
+ version="0.1.6",
6
+ description="A library for counterparty extraction and narrative parsing.",
7
+ package_dir={"": "src"},
8
+ packages=find_packages(where="src"),
9
+ package_data={
10
+ "counterparty": ["key_engine/*.json"],
11
+ },
12
+ python_requires=">=3.9",
13
+ install_requires=[
14
+ "rapidfuzz>=2.0.0",
15
+ ],
16
+ )
@@ -0,0 +1,29 @@
1
+ from counterparty.route import route_to_parser
2
+ from counterparty.extraction.extract_payer_payee import extract_payor_payee
3
+ from counterparty.util import normalize_spaces
4
+
5
+ def get_counterparty(narrative: str, amount: float = None) -> dict:
6
+
7
+ if not narrative:
8
+ return {
9
+ "parsed": {},
10
+ "ctpty": {"payer": None,"payee": None,"amount": amount,"counterparty": None
11
+ }
12
+ }
13
+
14
+ parsed_result, parser_type = route_to_parser(narrative)
15
+
16
+ extraction_result = extract_payor_payee(
17
+ parsed=parsed_result,
18
+ amount=amount,
19
+ narrative=narrative
20
+ )
21
+
22
+ final_result = {
23
+ "parsed": parsed_result,
24
+ "ctpty": extraction_result
25
+ }
26
+
27
+ final_result["parsed"]["parser_type"] = parser_type
28
+
29
+ return final_result
@@ -0,0 +1,124 @@
1
+ import re
2
+ # no spacy used. recoded to simple plain python logics
3
+
4
+ LEGAL_SUFFIXES = {
5
+ "LLC", "L.L.C",
6
+ "INC", "INC.",
7
+ "LTD", "LTD.",
8
+ "LLP",
9
+ "CORP", "CORPORATION",
10
+ "CO", "CO.",
11
+ "COMPANY",
12
+ "HOLDINGS", "GROUP", "PLC"
13
+ }
14
+
15
+ O_MARKER_RE = re.compile(r"O/\d*/|O/")
16
+
17
+ STOP_WORDS = {
18
+ "NOTPROVIDED", "NA", "N/A", "UNKNOWN", "UNAVAILABLE"
19
+ }
20
+
21
+ def is_garbage_token(t: str) -> bool:
22
+ """Check if token is numeric shite (>4 digits) or lacks alpha."""
23
+ t = t.strip()
24
+ if not t: return True
25
+ # If more than 4 digits in a row -> garbage
26
+ if re.search(r"\d{5,}", t): return True
27
+ # If it has no letters at all -> garbage
28
+ if not re.search(r"[A-Za-z]", t): return True
29
+ return False
30
+
31
+ def getEntity(text):
32
+ if not text or not text.strip():
33
+ return None
34
+
35
+ # light normalization1 - split some delims
36
+ for delim in ["*", "-"]:
37
+ text = text.replace(delim, " ")
38
+ text = re.sub(r"\s+", " ", text).strip()
39
+
40
+ # light normalization2 - strip leading alphanumeric garbage shite
41
+ tokens = text.split()
42
+ start_idx = 0
43
+ while start_idx < len(tokens):
44
+ t = tokens[start_idx]
45
+ if any(c.isdigit() for c in t) and not any(c.isalpha() for c in t):
46
+ start_idx += 1
47
+ elif len(re.findall(r"\d", t)) > 4:
48
+ start_idx += 1
49
+ else:
50
+ break
51
+
52
+ if start_idx >= len(tokens):
53
+ return None
54
+
55
+ text = " ".join(tokens[start_idx:])
56
+ tokens = text.split()
57
+
58
+
59
+ # 1. ORG via legal suffix (end-anchored)
60
+ # We look for the last token that is a legal suffix
61
+ for i in range(len(tokens) - 1, -1, -1):
62
+ if tokens[i].upper().strip(".,") in LEGAL_SUFFIXES:
63
+ # Found a suffix, now collect tokens to the left until we hit noise
64
+ name_parts = []
65
+ for j in range(i, -1, -1):
66
+ t = tokens[j]
67
+ # stop if we hit a stop word or something too numeric
68
+ if t.upper() in STOP_WORDS or "/" in t: break
69
+ if len(re.findall(r"\d", t)) > 4: break
70
+ name_parts.append(t)
71
+
72
+ if len(name_parts) >= 1:
73
+ name_parts.reverse()
74
+ return " ".join(name_parts)
75
+
76
+
77
+ # 2. Name before O/ marker
78
+ m = O_MARKER_RE.search(text)
79
+ if m:
80
+ before = text[:m.start()].strip().split()
81
+ name_parts = []
82
+ for t in reversed(before):
83
+ if not is_garbage_token(t):
84
+ name_parts.append(t)
85
+ else:
86
+ break
87
+ if name_parts:
88
+ name_parts.reverse()
89
+ return " ".join(name_parts)
90
+
91
+
92
+ # 3. Name before STOP words or slash
93
+ for i, tok in enumerate(tokens):
94
+ if tok.upper() in STOP_WORDS or tok == "/":
95
+ before = tokens[:i]
96
+ name_parts = []
97
+ for t in reversed(before):
98
+ if not is_garbage_token(t):
99
+ name_parts.append(t)
100
+ else:
101
+ break
102
+ if name_parts:
103
+ name_parts.reverse()
104
+ return " ".join(name_parts)
105
+
106
+
107
+ # 4. Final Fallback: just return the cleaned tokens, stripping long numbers from start/end
108
+ final_tokens = []
109
+ for t in tokens:
110
+ if not is_garbage_token(t):
111
+ final_tokens.append(t)
112
+
113
+ if len(final_tokens) >= 1:
114
+ return " ".join(final_tokens)
115
+
116
+ return None
117
+
118
+ # def getEntity_spacy_original(text):
119
+ # if not text or not text.strip():
120
+ # return None
121
+ #
122
+ # # ... (original spacy logic commented out)
123
+ # doc = nlp(text)
124
+ # # ...
@@ -0,0 +1,160 @@
1
+ import re
2
+ from typing import Any, Dict, Optional
3
+ from counterparty.util import norm2
4
+ from counterparty.extraction.infer_counterparty import infer_counterparty
5
+ from counterparty.extraction.clean import getEntity
6
+
7
+
8
+ # these keys will be used to tell who is the payer, who is the payee after parsing.
9
+ PAYER_KEYS = [
10
+ "ordering customer","sending co name","ordering cust","company name","sender name","debtor name","from account","comp name","entry desc","orig co name","from acct","originator","debtor","sender","comp name","orig","org"]
11
+
12
+ PAYEE_KEYS = [
13
+ "individual or receiving company name","receiver name","creditor name","customer name","ulti bene","recv name","beneficiary","cust name","creditor","receiver","bn f","bnf","bn"]
14
+
15
+
16
+ COUNTERPARTY_KEYS = [
17
+ "entity", "counterparty_name", "related entity", "related party", "from_account", "to_account", "entity_name", "counterparty", "original_counterparty"
18
+ ]
19
+
20
+
21
+ def is_account_like(v: str) -> bool:
22
+ if not v:
23
+ return False
24
+
25
+ has_digit = bool(re.search(r"\d", v))
26
+ mostly_non_alpha = len(re.findall(r"[A-Z]", v)) <= 2
27
+ return has_digit and mostly_non_alpha
28
+
29
+
30
+ def _finalize(payer, payee, amount, narrative):
31
+
32
+ # remove noise and get entity name using my spacy based cleaner
33
+ payer = getEntity(payer) or payer
34
+ payee = getEntity(payee) or payee
35
+
36
+
37
+
38
+ return {
39
+ "payer": payer,
40
+ "payee": payee,
41
+ "counterparty": infer_counterparty(payer, payee, amount, narrative),
42
+ "amount": amount,
43
+ }
44
+
45
+
46
+ def extract_payor_payee(
47
+ parsed: Dict[str, Any],
48
+ amount: Optional[float] = None,
49
+ narrative: Optional[str] = None,
50
+ ) -> Dict[str, Any]:
51
+
52
+ data = {k.lower(): v for k, v in parsed.items()}
53
+
54
+ payer = None
55
+ payee = None
56
+
57
+
58
+ # Rule 1: Structured explicit payer / payee fields
59
+ for k in PAYER_KEYS:
60
+ if k in data:
61
+ v = data[k]
62
+ payer = norm2(v.get("value") if isinstance(v, dict) else v)
63
+ if payer:
64
+ break
65
+
66
+ for k in PAYEE_KEYS:
67
+ if k in data:
68
+ v = data[k]
69
+ payee = norm2(v.get("value") if isinstance(v, dict) else v)
70
+ if payee:
71
+ break
72
+
73
+ if payer and is_account_like(payer):
74
+ payer = f"BANK({payer})"
75
+
76
+ if payee and is_account_like(payee):
77
+ payee = f"BANK({payee})"
78
+
79
+ # Rule 2: ACH RECEIVED override
80
+ ach_text = norm2(narrative) or norm2(data.get("raw")) or ""
81
+ ach_u = ach_text.upper()
82
+
83
+ if "ACH" in ach_u and "RECEIVED" in ach_u:
84
+ cust = norm2(data.get("cust name"))
85
+ comp = norm2(data.get("comp name"))
86
+
87
+ # if cust and comp:
88
+ if "DEBIT" in ach_u:
89
+ return _finalize(cust, comp, amount, narrative)
90
+ if "CREDIT" in ach_u:
91
+ return _finalize(comp, cust, amount, narrative)
92
+
93
+ # Rule 2b: ACH Disbursement Funding Debit
94
+ if "ACH" in ach_u and "DISBURSEMENT" in ach_u and "DEBIT" in ach_u:
95
+ comp = norm2(data.get("comp name") or data.get("sending co name"))
96
+ recv = norm2(data.get("recv name") or data.get("receiver name") or data.get("cust name"))
97
+
98
+ # if comp and recv:
99
+ # Customer paid out → customer is payer, company is payee
100
+ return _finalize(recv, comp, amount, narrative)
101
+
102
+ # Rule 3: Both roles known
103
+ if payer and payee:
104
+ return _finalize(payer, payee, amount, narrative)
105
+
106
+ # Rule 4: Only one role known
107
+ if payer and not payee:
108
+ return _finalize(payer, None, amount, narrative)
109
+
110
+ if payee and not payer:
111
+ return _finalize(None, payee, amount, narrative)
112
+
113
+ # Rule 5: PIX inference
114
+ narrative_text = norm2(parsed.get("narrative") or parsed.get("description"))
115
+
116
+ if narrative_text and re.search(r"\bPIX\b", narrative_text, re.IGNORECASE):
117
+ m = re.search(
118
+ r"\bPIX(?:\s+QRS|\s+TRANSF|\s+QR)?\s+([A-Z][A-Z\s]{2,})",
119
+ narrative_text.upper(),
120
+ )
121
+
122
+ if m:
123
+ ctpty = norm2(m.group(1))
124
+ ctpty = re.sub(r"\s+\d.*$", "", ctpty).strip()
125
+
126
+ if ctpty and not is_account_like(ctpty):
127
+ if re.search(r"\b(RECEB|RECEBIDO|CR|CRED)\b", narrative_text.upper()):
128
+ return _finalize(ctpty, None, amount, narrative)
129
+
130
+ return _finalize(None, ctpty, amount, narrative)
131
+
132
+ # Rule 6: Generic counterparty fields
133
+ ctpty = None
134
+ for k in COUNTERPARTY_KEYS:
135
+ if k in data:
136
+ ctpty = norm2(data[k])
137
+ if ctpty:
138
+ break
139
+
140
+ if ctpty:
141
+ if is_account_like(ctpty):
142
+ ctpty = f"BANK({ctpty})"
143
+
144
+ if amount is not None and amount < 0:
145
+ return _finalize(None, ctpty, amount, narrative)
146
+
147
+ if amount is not None and amount >= 0:
148
+ return _finalize(ctpty, None, amount, narrative)
149
+
150
+ return _finalize(None, ctpty, amount, narrative)
151
+
152
+ # Rule 7: Amount-only inference
153
+ if amount is not None:
154
+ if amount < 0:
155
+ return _finalize(None, None, amount, narrative)
156
+
157
+ return _finalize(None, None, amount, narrative)
158
+
159
+ # Rule 8: Nothing resolved
160
+ return _finalize(None, None, amount, narrative)
@@ -0,0 +1,17 @@
1
+ def infer_counterparty(payer, payee, amount, narrative):
2
+ text = (narrative or "").upper()
3
+
4
+ # if "ACH" in text and "RECEIVED" in text:
5
+ if "ACH" in text:
6
+ if "CREDIT" in text:
7
+ return payer
8
+ if "DEBIT" in text:
9
+ return payee
10
+
11
+ if amount is not None:
12
+ if amount > 0:
13
+ return payer
14
+ if amount < 0:
15
+ return payee
16
+
17
+ return None