ledgerline 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ledgerline/__init__.py ADDED
@@ -0,0 +1,7 @@
1
+ """ledgerline — local-first personal finance tracker."""
2
+
3
+ __version__ = "0.3.0"
4
+
5
+
6
+ class LedgerlineError(Exception):
7
+ """User-facing error: print message and exit nonzero, no traceback."""
ledgerline/accounts.py ADDED
@@ -0,0 +1,60 @@
1
+ """Durable account metadata used to interpret financial activity."""
2
+
3
+ import sqlite3
4
+ from typing import Any
5
+
6
+ PURPOSES = ("personal", "business", "mixed", "unknown")
7
+ ANALYSIS_TREATMENTS = ("include", "monitor_only", "exclude")
8
+
9
+
10
+ def set_context(
11
+ conn: sqlite3.Connection,
12
+ account_name: str,
13
+ *,
14
+ purpose: str | None = None,
15
+ entity_name: str | None = None,
16
+ business_use_percent: int | None = None,
17
+ context_note: str | None = None,
18
+ analysis_treatment: str | None = None,
19
+ ) -> dict[str, Any]:
20
+ """Update interpretive metadata without changing bank-sourced fields."""
21
+ row = conn.execute("SELECT * FROM accounts WHERE name = ?", (account_name,)).fetchone()
22
+ if not row:
23
+ raise ValueError(f"unknown account: {account_name}")
24
+ if purpose is not None and purpose not in PURPOSES:
25
+ raise ValueError(f"purpose must be one of {', '.join(PURPOSES)}")
26
+ if business_use_percent is not None and not 0 <= business_use_percent <= 100:
27
+ raise ValueError("business_use_percent must be between 0 and 100")
28
+ if analysis_treatment is not None and analysis_treatment not in ANALYSIS_TREATMENTS:
29
+ raise ValueError(
30
+ f"analysis_treatment must be one of {', '.join(ANALYSIS_TREATMENTS)}"
31
+ )
32
+
33
+ updates: dict[str, object | None] = {}
34
+ if purpose is not None:
35
+ updates["purpose"] = purpose
36
+ if business_use_percent is None:
37
+ if purpose == "personal":
38
+ updates["business_use_percent"] = 0
39
+ elif purpose == "business":
40
+ updates["business_use_percent"] = 100
41
+ elif purpose == "unknown":
42
+ updates["business_use_percent"] = None
43
+ if entity_name is not None:
44
+ updates["entity_name"] = entity_name.strip() or None
45
+ if business_use_percent is not None:
46
+ updates["business_use_percent"] = business_use_percent
47
+ if context_note is not None:
48
+ updates["context_note"] = context_note.strip() or None
49
+ if analysis_treatment is not None:
50
+ updates["analysis_treatment"] = analysis_treatment
51
+ if not updates:
52
+ raise ValueError("provide at least one account metadata field to update")
53
+
54
+ assignments = ", ".join(f"{column} = ?" for column in updates)
55
+ conn.execute(
56
+ f"UPDATE accounts SET {assignments} WHERE id = ?",
57
+ [*updates.values(), row["id"]],
58
+ )
59
+ conn.commit()
60
+ return dict(conn.execute("SELECT * FROM accounts WHERE id = ?", (row["id"],)).fetchone())
@@ -0,0 +1,195 @@
1
+ """Categorization pipeline: cache -> static rules -> batched LLM fallback.
2
+
3
+ Each unique merchant costs at most one LLM call ever (the cache). The LLM
4
+ sees merchant_clean strings only — no amounts, dates, or account info.
5
+ """
6
+
7
+ import json
8
+ import re
9
+ import sqlite3
10
+
11
+ from ledgerline.llm import MODEL, require_client
12
+
13
+ TAXONOMY = [
14
+ "housing", "utilities", "groceries", "dining", "transport", "health",
15
+ "fitness", "insurance", "subscriptions",
16
+ "professional", # CE courses, licensing, credentialing fees
17
+ "travel", "shopping", "entertainment", "income", "transfers", "fees",
18
+ "taxes", "other",
19
+ ]
20
+
21
+ # Obvious cases resolved in code; everything here is written to the cache as
22
+ # source='rule' so the LLM never sees these merchants.
23
+ _RULES: list[tuple[str, str]] = [
24
+ (r"kroger|safeway|trader joe|whole foods|aldi|publix|wegmans|h-?e-?b|food lion", "groceries"),
25
+ (r"starbucks|mcdonald|chipotle|chick-?fil|restaurant|pizza|cafe|coffee|bakery|doordash|uber eats|grubhub|taqueria|sushi", "dining"),
26
+ (r"uber(?! eats)|lyft|shell|chevron|exxon|marathon petro|parking|marta|mta |transit|toll", "transport"),
27
+ (r"airbnb|hotel|marriott|hilton|hyatt|expedia|delta air|united airlines|american airlines|southwest air", "travel"),
28
+ (r"netflix|spotify|hulu|disney\+|hbo|youtube premium|audible|apple\.com/bill|icloud", "subscriptions"),
29
+ (r"comcast|xfinity|verizon|at&t|t-mobile|georgia power|duke energy|water dept|gas company|electric", "utilities"),
30
+ (r"planet fitness|la fitness|equinox|crossfit|peloton|ymca|\bgym\b", "fitness"),
31
+ (r"geico|state farm|progressive ins|allstate|insurance", "insurance"),
32
+ (r"cvs|walgreens|pharmacy|dental|orthodont|medical|clinic|hospital|labcorp|quest diagnostics", "health"),
33
+ (r"payroll|direct dep|salary|adp wage", "income"),
34
+ (r"zelle|venmo|wire transfer|online transfer|\btransfer\b", "transfers"),
35
+ (r"overdraft|service charge|annual fee|late fee|atm fee|interest charge|foreign transaction", "fees"),
36
+ (r"\birs\b|us treasury|tax payment|dept of revenue", "taxes"),
37
+ (r"rent|mortgage|property mgmt|hoa dues", "housing"),
38
+ (r"amazon|amzn|target|walmart|best buy|ikea|etsy|ebay", "shopping"),
39
+ (r"amc theat|cinema|ticketmaster|steam games|nintendo|playstation", "entertainment"),
40
+ ]
41
+ RULES = [(re.compile(p, re.I), cat) for p, cat in _RULES]
42
+
43
+
44
+ def rule_category(merchant_clean: str) -> str | None:
45
+ for rx, cat in RULES:
46
+ if rx.search(merchant_clean):
47
+ return cat
48
+ return None
49
+
50
+
51
+ def apply_cache(conn: sqlite3.Connection) -> int:
52
+ """Fill in category on transactions from the merchant cache."""
53
+ cur = conn.execute(
54
+ "UPDATE transactions SET category ="
55
+ " (SELECT category FROM merchant_category_cache c"
56
+ " WHERE c.merchant_clean = transactions.merchant_clean)"
57
+ " WHERE category IS NULL AND merchant_clean IN"
58
+ " (SELECT merchant_clean FROM merchant_category_cache)"
59
+ )
60
+ conn.commit()
61
+ return cur.rowcount
62
+
63
+
64
+ def _cache_put(conn: sqlite3.Connection, merchant: str, category: str, source: str,
65
+ confirmed: int = 0) -> None:
66
+ conn.execute(
67
+ "INSERT INTO merchant_category_cache (merchant_clean, category, source, confirmed)"
68
+ " VALUES (?, ?, ?, ?)"
69
+ " ON CONFLICT(merchant_clean) DO UPDATE SET"
70
+ " category = excluded.category, source = excluded.source,"
71
+ " confirmed = excluded.confirmed",
72
+ (merchant, category, source, confirmed),
73
+ )
74
+
75
+
76
+ def categorize_rules_only(conn: sqlite3.Connection) -> tuple[int, list[str]]:
77
+ """Steps 1+2 of the pipeline (no API key needed).
78
+
79
+ Returns (transactions categorized, merchants still uncached).
80
+ """
81
+ applied = apply_cache(conn)
82
+ uncached = [
83
+ r["merchant_clean"]
84
+ for r in conn.execute(
85
+ "SELECT DISTINCT merchant_clean FROM transactions"
86
+ " WHERE category IS NULL AND merchant_clean IS NOT NULL"
87
+ " ORDER BY merchant_clean"
88
+ )
89
+ ]
90
+ still_unknown = []
91
+ for m in uncached:
92
+ cat = rule_category(m)
93
+ if cat:
94
+ _cache_put(conn, m, cat, "rule")
95
+ else:
96
+ still_unknown.append(m)
97
+ conn.commit()
98
+ applied += apply_cache(conn)
99
+ return applied, still_unknown
100
+
101
+
102
+ def categorize_llm(conn: sqlite3.Connection, merchants: list[str]) -> int:
103
+ """Step 3: ONE batched request for all uncached merchants of an import.
104
+
105
+ The model sees merchant names only. Every returned category is validated
106
+ against the taxonomy; anything outside it (and any merchant the model
107
+ skipped) is cached as 'other'.
108
+ """
109
+ if not merchants:
110
+ return 0
111
+ client = require_client()
112
+ schema = {
113
+ "type": "object",
114
+ "properties": {
115
+ "assignments": {
116
+ "type": "array",
117
+ "items": {
118
+ "type": "object",
119
+ "properties": {
120
+ "merchant": {"type": "string"},
121
+ "category": {"type": "string", "enum": TAXONOMY},
122
+ },
123
+ "required": ["merchant", "category"],
124
+ "additionalProperties": False,
125
+ },
126
+ }
127
+ },
128
+ "required": ["assignments"],
129
+ "additionalProperties": False,
130
+ }
131
+ response = client.messages.create(
132
+ model=MODEL,
133
+ max_tokens=16000,
134
+ system=(
135
+ "You categorize personal-finance merchant names into a fixed taxonomy. "
136
+ "Assign every merchant in the list exactly one category. "
137
+ "Use 'other' when genuinely unsure."
138
+ ),
139
+ messages=[
140
+ {
141
+ "role": "user",
142
+ "content": "Categorize these merchants:\n"
143
+ + json.dumps(merchants, ensure_ascii=False),
144
+ }
145
+ ],
146
+ output_config={"format": {"type": "json_schema", "schema": schema}},
147
+ )
148
+ text = next(b.text for b in response.content if b.type == "text")
149
+ assignments = json.loads(text)["assignments"]
150
+
151
+ wanted = set(merchants)
152
+ resolved: dict[str, str] = {}
153
+ for a in assignments:
154
+ # Belt and braces: the schema enum already constrains category, but
155
+ # invariant says validate in code and reject anything off-taxonomy.
156
+ if a["merchant"] in wanted and a["category"] in TAXONOMY:
157
+ resolved[a["merchant"]] = a["category"]
158
+ for m in merchants:
159
+ _cache_put(conn, m, resolved.get(m, "other"), "llm")
160
+ conn.commit()
161
+ return apply_cache(conn)
162
+
163
+
164
+ def set_manual(conn: sqlite3.Connection, merchant_clean: str, category: str) -> int:
165
+ """Manual correction: cache as confirmed and retroactively recategorize
166
+ ALL matching transactions, not just uncategorized ones."""
167
+ if category not in TAXONOMY:
168
+ raise ValueError(f"{category!r} is not in the taxonomy")
169
+ _cache_put(conn, merchant_clean, category, "manual", confirmed=1)
170
+ cur = conn.execute(
171
+ "UPDATE transactions SET category = ? WHERE merchant_clean = ?",
172
+ (category, merchant_clean),
173
+ )
174
+ conn.commit()
175
+ return cur.rowcount
176
+
177
+
178
+ def confirm(conn: sqlite3.Connection, merchant_clean: str) -> None:
179
+ conn.execute(
180
+ "UPDATE merchant_category_cache SET confirmed = 1 WHERE merchant_clean = ?",
181
+ (merchant_clean,),
182
+ )
183
+ conn.commit()
184
+
185
+
186
+ def unconfirmed(conn: sqlite3.Connection) -> list[sqlite3.Row]:
187
+ """LLM-assigned first (most likely to need correction), then rules."""
188
+ return conn.execute(
189
+ "SELECT c.merchant_clean, c.category, c.source, COUNT(t.id) AS txn_count"
190
+ " FROM merchant_category_cache c"
191
+ " LEFT JOIN transactions t ON t.merchant_clean = c.merchant_clean"
192
+ " WHERE c.confirmed = 0"
193
+ " GROUP BY c.merchant_clean"
194
+ " ORDER BY c.source = 'llm' DESC, txn_count DESC"
195
+ ).fetchall()