ledgerline 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ledgerline/__init__.py +7 -0
- ledgerline/accounts.py +60 -0
- ledgerline/categorize.py +195 -0
- ledgerline/cli.py +457 -0
- ledgerline/connectors/__init__.py +0 -0
- ledgerline/connectors/simplefin.py +314 -0
- ledgerline/db.py +64 -0
- ledgerline/demo.py +195 -0
- ledgerline/ingest/__init__.py +178 -0
- ledgerline/ingest/csv_generic.py +41 -0
- ledgerline/ingest/ofx.py +62 -0
- ledgerline/ingest/profiles.py +35 -0
- ledgerline/ingest/types.py +24 -0
- ledgerline/llm.py +23 -0
- ledgerline/mcp_server.py +729 -0
- ledgerline/migrations/001_init.sql +67 -0
- ledgerline/migrations/002_account_balances.sql +3 -0
- ledgerline/migrations/003_recurring_scope.sql +4 -0
- ledgerline/migrations/004_sync_state.sql +4 -0
- ledgerline/migrations/005_account_context.sql +7 -0
- ledgerline/migrations/006_account_analysis_treatment.sql +4 -0
- ledgerline/money.py +30 -0
- ledgerline/normalize.py +84 -0
- ledgerline/query.py +294 -0
- ledgerline/recurring.py +209 -0
- ledgerline-0.3.0.dist-info/METADATA +290 -0
- ledgerline-0.3.0.dist-info/RECORD +30 -0
- ledgerline-0.3.0.dist-info/WHEEL +4 -0
- ledgerline-0.3.0.dist-info/entry_points.txt +3 -0
- ledgerline-0.3.0.dist-info/licenses/LICENSE +21 -0
ledgerline/__init__.py
ADDED
ledgerline/accounts.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""Durable account metadata used to interpret financial activity."""
|
|
2
|
+
|
|
3
|
+
import sqlite3
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
PURPOSES = ("personal", "business", "mixed", "unknown")
|
|
7
|
+
ANALYSIS_TREATMENTS = ("include", "monitor_only", "exclude")
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def set_context(
|
|
11
|
+
conn: sqlite3.Connection,
|
|
12
|
+
account_name: str,
|
|
13
|
+
*,
|
|
14
|
+
purpose: str | None = None,
|
|
15
|
+
entity_name: str | None = None,
|
|
16
|
+
business_use_percent: int | None = None,
|
|
17
|
+
context_note: str | None = None,
|
|
18
|
+
analysis_treatment: str | None = None,
|
|
19
|
+
) -> dict[str, Any]:
|
|
20
|
+
"""Update interpretive metadata without changing bank-sourced fields."""
|
|
21
|
+
row = conn.execute("SELECT * FROM accounts WHERE name = ?", (account_name,)).fetchone()
|
|
22
|
+
if not row:
|
|
23
|
+
raise ValueError(f"unknown account: {account_name}")
|
|
24
|
+
if purpose is not None and purpose not in PURPOSES:
|
|
25
|
+
raise ValueError(f"purpose must be one of {', '.join(PURPOSES)}")
|
|
26
|
+
if business_use_percent is not None and not 0 <= business_use_percent <= 100:
|
|
27
|
+
raise ValueError("business_use_percent must be between 0 and 100")
|
|
28
|
+
if analysis_treatment is not None and analysis_treatment not in ANALYSIS_TREATMENTS:
|
|
29
|
+
raise ValueError(
|
|
30
|
+
f"analysis_treatment must be one of {', '.join(ANALYSIS_TREATMENTS)}"
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
updates: dict[str, object | None] = {}
|
|
34
|
+
if purpose is not None:
|
|
35
|
+
updates["purpose"] = purpose
|
|
36
|
+
if business_use_percent is None:
|
|
37
|
+
if purpose == "personal":
|
|
38
|
+
updates["business_use_percent"] = 0
|
|
39
|
+
elif purpose == "business":
|
|
40
|
+
updates["business_use_percent"] = 100
|
|
41
|
+
elif purpose == "unknown":
|
|
42
|
+
updates["business_use_percent"] = None
|
|
43
|
+
if entity_name is not None:
|
|
44
|
+
updates["entity_name"] = entity_name.strip() or None
|
|
45
|
+
if business_use_percent is not None:
|
|
46
|
+
updates["business_use_percent"] = business_use_percent
|
|
47
|
+
if context_note is not None:
|
|
48
|
+
updates["context_note"] = context_note.strip() or None
|
|
49
|
+
if analysis_treatment is not None:
|
|
50
|
+
updates["analysis_treatment"] = analysis_treatment
|
|
51
|
+
if not updates:
|
|
52
|
+
raise ValueError("provide at least one account metadata field to update")
|
|
53
|
+
|
|
54
|
+
assignments = ", ".join(f"{column} = ?" for column in updates)
|
|
55
|
+
conn.execute(
|
|
56
|
+
f"UPDATE accounts SET {assignments} WHERE id = ?",
|
|
57
|
+
[*updates.values(), row["id"]],
|
|
58
|
+
)
|
|
59
|
+
conn.commit()
|
|
60
|
+
return dict(conn.execute("SELECT * FROM accounts WHERE id = ?", (row["id"],)).fetchone())
|
ledgerline/categorize.py
ADDED
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
"""Categorization pipeline: cache -> static rules -> batched LLM fallback.
|
|
2
|
+
|
|
3
|
+
Each unique merchant costs at most one LLM call ever (the cache). The LLM
|
|
4
|
+
sees merchant_clean strings only — no amounts, dates, or account info.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import re
|
|
9
|
+
import sqlite3
|
|
10
|
+
|
|
11
|
+
from ledgerline.llm import MODEL, require_client
|
|
12
|
+
|
|
13
|
+
TAXONOMY = [
|
|
14
|
+
"housing", "utilities", "groceries", "dining", "transport", "health",
|
|
15
|
+
"fitness", "insurance", "subscriptions",
|
|
16
|
+
"professional", # CE courses, licensing, credentialing fees
|
|
17
|
+
"travel", "shopping", "entertainment", "income", "transfers", "fees",
|
|
18
|
+
"taxes", "other",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
# Obvious cases resolved in code; everything here is written to the cache as
|
|
22
|
+
# source='rule' so the LLM never sees these merchants.
|
|
23
|
+
_RULES: list[tuple[str, str]] = [
|
|
24
|
+
(r"kroger|safeway|trader joe|whole foods|aldi|publix|wegmans|h-?e-?b|food lion", "groceries"),
|
|
25
|
+
(r"starbucks|mcdonald|chipotle|chick-?fil|restaurant|pizza|cafe|coffee|bakery|doordash|uber eats|grubhub|taqueria|sushi", "dining"),
|
|
26
|
+
(r"uber(?! eats)|lyft|shell|chevron|exxon|marathon petro|parking|marta|mta |transit|toll", "transport"),
|
|
27
|
+
(r"airbnb|hotel|marriott|hilton|hyatt|expedia|delta air|united airlines|american airlines|southwest air", "travel"),
|
|
28
|
+
(r"netflix|spotify|hulu|disney\+|hbo|youtube premium|audible|apple\.com/bill|icloud", "subscriptions"),
|
|
29
|
+
(r"comcast|xfinity|verizon|at&t|t-mobile|georgia power|duke energy|water dept|gas company|electric", "utilities"),
|
|
30
|
+
(r"planet fitness|la fitness|equinox|crossfit|peloton|ymca|\bgym\b", "fitness"),
|
|
31
|
+
(r"geico|state farm|progressive ins|allstate|insurance", "insurance"),
|
|
32
|
+
(r"cvs|walgreens|pharmacy|dental|orthodont|medical|clinic|hospital|labcorp|quest diagnostics", "health"),
|
|
33
|
+
(r"payroll|direct dep|salary|adp wage", "income"),
|
|
34
|
+
(r"zelle|venmo|wire transfer|online transfer|\btransfer\b", "transfers"),
|
|
35
|
+
(r"overdraft|service charge|annual fee|late fee|atm fee|interest charge|foreign transaction", "fees"),
|
|
36
|
+
(r"\birs\b|us treasury|tax payment|dept of revenue", "taxes"),
|
|
37
|
+
(r"rent|mortgage|property mgmt|hoa dues", "housing"),
|
|
38
|
+
(r"amazon|amzn|target|walmart|best buy|ikea|etsy|ebay", "shopping"),
|
|
39
|
+
(r"amc theat|cinema|ticketmaster|steam games|nintendo|playstation", "entertainment"),
|
|
40
|
+
]
|
|
41
|
+
RULES = [(re.compile(p, re.I), cat) for p, cat in _RULES]
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def rule_category(merchant_clean: str) -> str | None:
|
|
45
|
+
for rx, cat in RULES:
|
|
46
|
+
if rx.search(merchant_clean):
|
|
47
|
+
return cat
|
|
48
|
+
return None
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def apply_cache(conn: sqlite3.Connection) -> int:
|
|
52
|
+
"""Fill in category on transactions from the merchant cache."""
|
|
53
|
+
cur = conn.execute(
|
|
54
|
+
"UPDATE transactions SET category ="
|
|
55
|
+
" (SELECT category FROM merchant_category_cache c"
|
|
56
|
+
" WHERE c.merchant_clean = transactions.merchant_clean)"
|
|
57
|
+
" WHERE category IS NULL AND merchant_clean IN"
|
|
58
|
+
" (SELECT merchant_clean FROM merchant_category_cache)"
|
|
59
|
+
)
|
|
60
|
+
conn.commit()
|
|
61
|
+
return cur.rowcount
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _cache_put(conn: sqlite3.Connection, merchant: str, category: str, source: str,
|
|
65
|
+
confirmed: int = 0) -> None:
|
|
66
|
+
conn.execute(
|
|
67
|
+
"INSERT INTO merchant_category_cache (merchant_clean, category, source, confirmed)"
|
|
68
|
+
" VALUES (?, ?, ?, ?)"
|
|
69
|
+
" ON CONFLICT(merchant_clean) DO UPDATE SET"
|
|
70
|
+
" category = excluded.category, source = excluded.source,"
|
|
71
|
+
" confirmed = excluded.confirmed",
|
|
72
|
+
(merchant, category, source, confirmed),
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def categorize_rules_only(conn: sqlite3.Connection) -> tuple[int, list[str]]:
|
|
77
|
+
"""Steps 1+2 of the pipeline (no API key needed).
|
|
78
|
+
|
|
79
|
+
Returns (transactions categorized, merchants still uncached).
|
|
80
|
+
"""
|
|
81
|
+
applied = apply_cache(conn)
|
|
82
|
+
uncached = [
|
|
83
|
+
r["merchant_clean"]
|
|
84
|
+
for r in conn.execute(
|
|
85
|
+
"SELECT DISTINCT merchant_clean FROM transactions"
|
|
86
|
+
" WHERE category IS NULL AND merchant_clean IS NOT NULL"
|
|
87
|
+
" ORDER BY merchant_clean"
|
|
88
|
+
)
|
|
89
|
+
]
|
|
90
|
+
still_unknown = []
|
|
91
|
+
for m in uncached:
|
|
92
|
+
cat = rule_category(m)
|
|
93
|
+
if cat:
|
|
94
|
+
_cache_put(conn, m, cat, "rule")
|
|
95
|
+
else:
|
|
96
|
+
still_unknown.append(m)
|
|
97
|
+
conn.commit()
|
|
98
|
+
applied += apply_cache(conn)
|
|
99
|
+
return applied, still_unknown
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def categorize_llm(conn: sqlite3.Connection, merchants: list[str]) -> int:
|
|
103
|
+
"""Step 3: ONE batched request for all uncached merchants of an import.
|
|
104
|
+
|
|
105
|
+
The model sees merchant names only. Every returned category is validated
|
|
106
|
+
against the taxonomy; anything outside it (and any merchant the model
|
|
107
|
+
skipped) is cached as 'other'.
|
|
108
|
+
"""
|
|
109
|
+
if not merchants:
|
|
110
|
+
return 0
|
|
111
|
+
client = require_client()
|
|
112
|
+
schema = {
|
|
113
|
+
"type": "object",
|
|
114
|
+
"properties": {
|
|
115
|
+
"assignments": {
|
|
116
|
+
"type": "array",
|
|
117
|
+
"items": {
|
|
118
|
+
"type": "object",
|
|
119
|
+
"properties": {
|
|
120
|
+
"merchant": {"type": "string"},
|
|
121
|
+
"category": {"type": "string", "enum": TAXONOMY},
|
|
122
|
+
},
|
|
123
|
+
"required": ["merchant", "category"],
|
|
124
|
+
"additionalProperties": False,
|
|
125
|
+
},
|
|
126
|
+
}
|
|
127
|
+
},
|
|
128
|
+
"required": ["assignments"],
|
|
129
|
+
"additionalProperties": False,
|
|
130
|
+
}
|
|
131
|
+
response = client.messages.create(
|
|
132
|
+
model=MODEL,
|
|
133
|
+
max_tokens=16000,
|
|
134
|
+
system=(
|
|
135
|
+
"You categorize personal-finance merchant names into a fixed taxonomy. "
|
|
136
|
+
"Assign every merchant in the list exactly one category. "
|
|
137
|
+
"Use 'other' when genuinely unsure."
|
|
138
|
+
),
|
|
139
|
+
messages=[
|
|
140
|
+
{
|
|
141
|
+
"role": "user",
|
|
142
|
+
"content": "Categorize these merchants:\n"
|
|
143
|
+
+ json.dumps(merchants, ensure_ascii=False),
|
|
144
|
+
}
|
|
145
|
+
],
|
|
146
|
+
output_config={"format": {"type": "json_schema", "schema": schema}},
|
|
147
|
+
)
|
|
148
|
+
text = next(b.text for b in response.content if b.type == "text")
|
|
149
|
+
assignments = json.loads(text)["assignments"]
|
|
150
|
+
|
|
151
|
+
wanted = set(merchants)
|
|
152
|
+
resolved: dict[str, str] = {}
|
|
153
|
+
for a in assignments:
|
|
154
|
+
# Belt and braces: the schema enum already constrains category, but
|
|
155
|
+
# invariant says validate in code and reject anything off-taxonomy.
|
|
156
|
+
if a["merchant"] in wanted and a["category"] in TAXONOMY:
|
|
157
|
+
resolved[a["merchant"]] = a["category"]
|
|
158
|
+
for m in merchants:
|
|
159
|
+
_cache_put(conn, m, resolved.get(m, "other"), "llm")
|
|
160
|
+
conn.commit()
|
|
161
|
+
return apply_cache(conn)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def set_manual(conn: sqlite3.Connection, merchant_clean: str, category: str) -> int:
|
|
165
|
+
"""Manual correction: cache as confirmed and retroactively recategorize
|
|
166
|
+
ALL matching transactions, not just uncategorized ones."""
|
|
167
|
+
if category not in TAXONOMY:
|
|
168
|
+
raise ValueError(f"{category!r} is not in the taxonomy")
|
|
169
|
+
_cache_put(conn, merchant_clean, category, "manual", confirmed=1)
|
|
170
|
+
cur = conn.execute(
|
|
171
|
+
"UPDATE transactions SET category = ? WHERE merchant_clean = ?",
|
|
172
|
+
(category, merchant_clean),
|
|
173
|
+
)
|
|
174
|
+
conn.commit()
|
|
175
|
+
return cur.rowcount
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def confirm(conn: sqlite3.Connection, merchant_clean: str) -> None:
|
|
179
|
+
conn.execute(
|
|
180
|
+
"UPDATE merchant_category_cache SET confirmed = 1 WHERE merchant_clean = ?",
|
|
181
|
+
(merchant_clean,),
|
|
182
|
+
)
|
|
183
|
+
conn.commit()
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def unconfirmed(conn: sqlite3.Connection) -> list[sqlite3.Row]:
|
|
187
|
+
"""LLM-assigned first (most likely to need correction), then rules."""
|
|
188
|
+
return conn.execute(
|
|
189
|
+
"SELECT c.merchant_clean, c.category, c.source, COUNT(t.id) AS txn_count"
|
|
190
|
+
" FROM merchant_category_cache c"
|
|
191
|
+
" LEFT JOIN transactions t ON t.merchant_clean = c.merchant_clean"
|
|
192
|
+
" WHERE c.confirmed = 0"
|
|
193
|
+
" GROUP BY c.merchant_clean"
|
|
194
|
+
" ORDER BY c.source = 'llm' DESC, txn_count DESC"
|
|
195
|
+
).fetchall()
|