PyPI - counterparty - Versions diffs - 0.1.6__tar.gz - Mend

counterparty 0.1.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

counterparty-0.1.6/PKG-INFO ADDED Viewed

@@ -0,0 +1,9 @@
+Metadata-Version: 2.4
+Name: counterparty
+Version: 0.1.6
+Summary: A library for counterparty extraction and narrative parsing.
+Requires-Python: >=3.9
+Requires-Dist: rapidfuzz>=2.0.0
+Dynamic: requires-dist
+Dynamic: requires-python
+Dynamic: summary

counterparty-0.1.6/README.md ADDED Viewed

@@ -0,0 +1,70 @@
+# Counterparty Extraction Library
+A Python library designed to parse financial transaction narratives and extract key counterparty information (Payer, Payee, and Counterparty Name).
+## Features
+*   **Narrative Parsing**: Automatically detects transaction types (Wire, ACH, Check, etc.).
+*   **Entity Extraction**: Identifies the 'Payer' and 'Payee' from complex strings.
+*   **Clean Output**: Returns a structured JSON response separating parsing metadata from extraction results.
+## Installation
+You can install this package locally using `pip`.
+### For Development (Editable Mode)
+Recommended if you plan to modify the code.
+```bash
+git clone <repository-url>
+cd counterparty
+pip install -e .
+```
+### Standard Install
+```bash
+cd counterparty
+pip install .
+```
+## Usage
+The package provides a simple entry point `get_counterparty`.
+```python
+from counterparty import get_counterparty
+# 1. Define your transaction details
+narrative = "WIRE TRANSFER. Orig : GOOGLE INC"
+amount = 100.00  # Optional, but helps infer direction (Credit vs Debit)
+# 2. Extract info
+result = get_counterparty(narrative, amount=amount)
+# 3. Use the result
+print(result)
+```
+## Output Structure
+The output is a nested dictionary with two main sections:
+*   `parsed`: Contains the raw parsing details and metadata (e.g., transaction type).
+*   `ctpty`: Contains the extracted entity information.
+**Example Output:**
+```json
+{
+    "parsed": {
+        "RAW": "WIRE TRANSFER. Orig: GOOGLE INC",
+        "ORIG": "GOOGLE INC",
+        "META": "WIRE TRANSFER",
+        "parser_type": "wire"
+    },
+    "ctpty": {
+        "payer": "GOOGLE INC",
+        "payee": null,
+        "counterparty": "GOOGLE INC",
+        "amount": 100.0
+    }
+}
+```

counterparty-0.1.6/pyproject.toml ADDED Viewed

@@ -0,0 +1,3 @@
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"

counterparty-0.1.6/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

counterparty-0.1.6/setup.py ADDED Viewed

@@ -0,0 +1,16 @@
+from setuptools import setup, find_packages
+setup(
+    name="counterparty",
+    version="0.1.6",
+    description="A library for counterparty extraction and narrative parsing.",
+    package_dir={"": "src"},
+    packages=find_packages(where="src"),
+    package_data={
+        "counterparty": ["key_engine/*.json"],
+    },
+    python_requires=">=3.9",
+    install_requires=[
+        "rapidfuzz>=2.0.0",
+    ],
+)

counterparty-0.1.6/src/counterparty/__init__.py ADDED Viewed

@@ -0,0 +1,29 @@
+from counterparty.route import route_to_parser
+from counterparty.extraction.extract_payer_payee import extract_payor_payee
+from counterparty.util import normalize_spaces
+def get_counterparty(narrative: str, amount: float = None) -> dict:
+    if not narrative:
+        return {
+            "parsed": {},
+            "ctpty": {"payer": None,"payee": None,"amount": amount,"counterparty": None
+            }
+        }
+    parsed_result, parser_type = route_to_parser(narrative)
+    extraction_result = extract_payor_payee(
+        parsed=parsed_result,
+        amount=amount,
+        narrative=narrative
+    )
+    final_result = {
+        "parsed": parsed_result,
+        "ctpty": extraction_result
+    }
+    final_result["parsed"]["parser_type"] = parser_type
+    return final_result

counterparty-0.1.6/src/counterparty/extraction/__init__.py ADDED Viewed

File without changes

counterparty-0.1.6/src/counterparty/extraction/clean.py ADDED Viewed

@@ -0,0 +1,124 @@
+import re
+# no spacy used. recoded to simple plain python logics
+LEGAL_SUFFIXES = {
+    "LLC", "L.L.C",
+    "INC", "INC.",
+    "LTD", "LTD.",
+    "LLP",
+    "CORP", "CORPORATION",
+    "CO", "CO.",
+    "COMPANY",
+    "HOLDINGS", "GROUP", "PLC"
+}
+O_MARKER_RE = re.compile(r"O/\d*/|O/")
+STOP_WORDS = {
+    "NOTPROVIDED", "NA", "N/A", "UNKNOWN", "UNAVAILABLE"
+}
+def is_garbage_token(t: str) -> bool:
+    """Check if token is numeric shite (>4 digits) or lacks alpha."""
+    t = t.strip()
+    if not t: return True
+    # If more than 4 digits in a row -> garbage
+    if re.search(r"\d{5,}", t): return True
+    # If it has no letters at all -> garbage
+    if not re.search(r"[A-Za-z]", t): return True
+    return False
+def getEntity(text):
+    if not text or not text.strip():
+        return None
+    # light normalization1 - split some delims
+    for delim in ["*", "-"]:
+        text = text.replace(delim, " ")
+    text = re.sub(r"\s+", " ", text).strip()
+    # light normalization2 - strip leading alphanumeric garbage shite
+    tokens = text.split()
+    start_idx = 0
+    while start_idx < len(tokens):
+        t = tokens[start_idx]
+        if any(c.isdigit() for c in t) and not any(c.isalpha() for c in t):
+             start_idx += 1
+        elif len(re.findall(r"\d", t)) > 4:
+             start_idx += 1
+        else:
+            break
+    if start_idx >= len(tokens):
+        return None
+    text = " ".join(tokens[start_idx:])
+    tokens = text.split()
+    # 1. ORG via legal suffix (end-anchored)
+    # We look for the last token that is a legal suffix
+    for i in range(len(tokens) - 1, -1, -1):
+        if tokens[i].upper().strip(".,") in LEGAL_SUFFIXES:
+            # Found a suffix, now collect tokens to the left until we hit noise
+            name_parts = []
+            for j in range(i, -1, -1):
+                t = tokens[j]
+                # stop if we hit a stop word or something too numeric
+                if t.upper() in STOP_WORDS or "/" in t: break
+                if len(re.findall(r"\d", t)) > 4: break
+                name_parts.append(t)
+            if len(name_parts) >= 1:
+                name_parts.reverse()
+                return " ".join(name_parts)
+    # 2. Name before O/ marker
+    m = O_MARKER_RE.search(text)
+    if m:
+        before = text[:m.start()].strip().split()
+        name_parts = []
+        for t in reversed(before):
+            if not is_garbage_token(t):
+                name_parts.append(t)
+            else:
+                break
+        if name_parts:
+            name_parts.reverse()
+            return " ".join(name_parts)
+    # 3. Name before STOP words or slash
+    for i, tok in enumerate(tokens):
+        if tok.upper() in STOP_WORDS or tok == "/":
+            before = tokens[:i]
+            name_parts = []
+            for t in reversed(before):
+                if not is_garbage_token(t):
+                    name_parts.append(t)
+                else:
+                    break
+            if name_parts:
+                name_parts.reverse()
+                return " ".join(name_parts)
+    # 4. Final Fallback: just return the cleaned tokens, stripping long numbers from start/end
+    final_tokens = []
+    for t in tokens:
+        if not is_garbage_token(t):
+            final_tokens.append(t)
+    if len(final_tokens) >= 1:
+        return " ".join(final_tokens)
+    return None
+# def getEntity_spacy_original(text):
+#     if not text or not text.strip():
+#         return None
+#
+#     # ... (original spacy logic commented out)
+#     doc = nlp(text)
+#     # ...

counterparty-0.1.6/src/counterparty/extraction/extract_payer_payee.py ADDED Viewed

@@ -0,0 +1,160 @@
+import re
+from typing import Any, Dict, Optional
+from counterparty.util import norm2
+from counterparty.extraction.infer_counterparty import infer_counterparty
+from counterparty.extraction.clean import getEntity
+# these keys will be used to tell who is the payer, who is the payee after parsing.
+PAYER_KEYS = [
+    "ordering customer","sending co name","ordering cust","company name","sender name","debtor name","from account","comp name","entry desc","orig co name","from acct","originator","debtor","sender","comp name","orig","org"]
+PAYEE_KEYS = [
+    "individual or receiving company name","receiver name","creditor name","customer name","ulti bene","recv name","beneficiary","cust name","creditor","receiver","bn f","bnf","bn"]
+COUNTERPARTY_KEYS = [
+    "entity", "counterparty_name", "related entity", "related party", "from_account", "to_account", "entity_name", "counterparty", "original_counterparty"
+]
+def is_account_like(v: str) -> bool:
+    if not v:
+        return False
+    has_digit = bool(re.search(r"\d", v))
+    mostly_non_alpha = len(re.findall(r"[A-Z]", v)) <= 2
+    return has_digit and mostly_non_alpha
+def _finalize(payer, payee, amount, narrative):
+    # remove noise and get entity name using my spacy based cleaner
+    payer = getEntity(payer) or payer
+    payee = getEntity(payee) or payee
+    return {
+        "payer": payer,
+        "payee": payee,
+        "counterparty": infer_counterparty(payer, payee, amount, narrative),
+        "amount": amount,
+    }
+def extract_payor_payee(
+    parsed: Dict[str, Any],
+    amount: Optional[float] = None,
+    narrative: Optional[str] = None,
+) -> Dict[str, Any]:
+    data = {k.lower(): v for k, v in parsed.items()}
+    payer = None
+    payee = None
+    # Rule 1: Structured explicit payer / payee fields
+    for k in PAYER_KEYS:
+        if k in data:
+            v = data[k]
+            payer = norm2(v.get("value") if isinstance(v, dict) else v)
+            if payer:
+                break
+    for k in PAYEE_KEYS:
+        if k in data:
+            v = data[k]
+            payee = norm2(v.get("value") if isinstance(v, dict) else v)
+            if payee:
+                break
+    if payer and is_account_like(payer):
+        payer = f"BANK({payer})"
+    if payee and is_account_like(payee):
+        payee = f"BANK({payee})"
+    # Rule 2: ACH RECEIVED override
+    ach_text = norm2(narrative) or norm2(data.get("raw")) or ""
+    ach_u = ach_text.upper()
+    if "ACH" in ach_u and "RECEIVED" in ach_u:
+        cust = norm2(data.get("cust name"))
+        comp = norm2(data.get("comp name"))
+        # if cust and comp:
+        if "DEBIT" in ach_u:
+            return _finalize(cust, comp, amount, narrative)
+        if "CREDIT" in ach_u:
+            return _finalize(comp, cust, amount, narrative)
+    # Rule 2b: ACH Disbursement Funding Debit
+    if "ACH" in ach_u and "DISBURSEMENT" in ach_u and "DEBIT" in ach_u:
+        comp = norm2(data.get("comp name") or data.get("sending co name"))
+        recv = norm2(data.get("recv name") or data.get("receiver name") or data.get("cust name"))
+        # if comp and recv:
+            # Customer paid out → customer is payer, company is payee
+        return _finalize(recv, comp, amount, narrative)
+    # Rule 3: Both roles known
+    if payer and payee:
+        return _finalize(payer, payee, amount, narrative)
+    # Rule 4: Only one role known
+    if payer and not payee:
+        return _finalize(payer, None, amount, narrative)
+    if payee and not payer:
+        return _finalize(None, payee, amount, narrative)
+    # Rule 5: PIX inference
+    narrative_text = norm2(parsed.get("narrative") or parsed.get("description"))
+    if narrative_text and re.search(r"\bPIX\b", narrative_text, re.IGNORECASE):
+        m = re.search(
+            r"\bPIX(?:\s+QRS|\s+TRANSF|\s+QR)?\s+([A-Z][A-Z\s]{2,})",
+            narrative_text.upper(),
+        )
+        if m:
+            ctpty = norm2(m.group(1))
+            ctpty = re.sub(r"\s+\d.*$", "", ctpty).strip()
+            if ctpty and not is_account_like(ctpty):
+                if re.search(r"\b(RECEB|RECEBIDO|CR|CRED)\b", narrative_text.upper()):
+                    return _finalize(ctpty, None, amount, narrative)
+                return _finalize(None, ctpty, amount, narrative)
+    # Rule 6: Generic counterparty fields
+    ctpty = None
+    for k in COUNTERPARTY_KEYS:
+        if k in data:
+            ctpty = norm2(data[k])
+            if ctpty:
+                break
+    if ctpty:
+        if is_account_like(ctpty):
+            ctpty = f"BANK({ctpty})"
+        if amount is not None and amount < 0:
+            return _finalize(None, ctpty, amount, narrative)
+        if amount is not None and amount >= 0:
+            return _finalize(ctpty, None, amount, narrative)
+        return _finalize(None, ctpty, amount, narrative)
+    # Rule 7: Amount-only inference
+    if amount is not None:
+        if amount < 0:
+            return _finalize(None, None, amount, narrative)
+        return _finalize(None, None, amount, narrative)
+    # Rule 8: Nothing resolved
+    return _finalize(None, None, amount, narrative)

counterparty-0.1.6/src/counterparty/extraction/infer_counterparty.py ADDED Viewed

@@ -0,0 +1,17 @@
+def infer_counterparty(payer, payee, amount, narrative):
+    text = (narrative or "").upper()
+    # if "ACH" in text and "RECEIVED" in text:
+    if "ACH" in text:
+        if "CREDIT" in text:
+            return payer
+        if "DEBIT" in text:
+            return payee
+    if amount is not None:
+        if amount > 0:
+            return payer
+        if amount < 0:
+            return payee
+    return None

counterparty-0.1.6/src/counterparty/key_engine/__init__.py ADDED Viewed

File without changes