ledgerline 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. ledgerline-0.3.0/.github/workflows/ci.yml +17 -0
  2. ledgerline-0.3.0/.gitignore +31 -0
  3. ledgerline-0.3.0/LICENSE +21 -0
  4. ledgerline-0.3.0/PKG-INFO +290 -0
  5. ledgerline-0.3.0/README.md +241 -0
  6. ledgerline-0.3.0/ledgerline/__init__.py +7 -0
  7. ledgerline-0.3.0/ledgerline/accounts.py +60 -0
  8. ledgerline-0.3.0/ledgerline/categorize.py +195 -0
  9. ledgerline-0.3.0/ledgerline/cli.py +457 -0
  10. ledgerline-0.3.0/ledgerline/connectors/__init__.py +0 -0
  11. ledgerline-0.3.0/ledgerline/connectors/simplefin.py +314 -0
  12. ledgerline-0.3.0/ledgerline/db.py +64 -0
  13. ledgerline-0.3.0/ledgerline/demo.py +195 -0
  14. ledgerline-0.3.0/ledgerline/ingest/__init__.py +178 -0
  15. ledgerline-0.3.0/ledgerline/ingest/csv_generic.py +41 -0
  16. ledgerline-0.3.0/ledgerline/ingest/ofx.py +62 -0
  17. ledgerline-0.3.0/ledgerline/ingest/profiles.py +35 -0
  18. ledgerline-0.3.0/ledgerline/ingest/types.py +24 -0
  19. ledgerline-0.3.0/ledgerline/llm.py +23 -0
  20. ledgerline-0.3.0/ledgerline/mcp_server.py +729 -0
  21. ledgerline-0.3.0/ledgerline/migrations/001_init.sql +67 -0
  22. ledgerline-0.3.0/ledgerline/migrations/002_account_balances.sql +3 -0
  23. ledgerline-0.3.0/ledgerline/migrations/003_recurring_scope.sql +4 -0
  24. ledgerline-0.3.0/ledgerline/migrations/004_sync_state.sql +4 -0
  25. ledgerline-0.3.0/ledgerline/migrations/005_account_context.sql +7 -0
  26. ledgerline-0.3.0/ledgerline/migrations/006_account_analysis_treatment.sql +4 -0
  27. ledgerline-0.3.0/ledgerline/money.py +30 -0
  28. ledgerline-0.3.0/ledgerline/normalize.py +84 -0
  29. ledgerline-0.3.0/ledgerline/query.py +294 -0
  30. ledgerline-0.3.0/ledgerline/recurring.py +209 -0
  31. ledgerline-0.3.0/pyproject.toml +59 -0
  32. ledgerline-0.3.0/tests/__init__.py +0 -0
  33. ledgerline-0.3.0/tests/conftest.py +19 -0
  34. ledgerline-0.3.0/tests/fixtures/generic_visa_jan.csv +5 -0
  35. ledgerline-0.3.0/tests/fixtures/malformed.csv +6 -0
  36. ledgerline-0.3.0/tests/fixtures/overlap_1.csv +4 -0
  37. ledgerline-0.3.0/tests/fixtures/overlap_2.csv +4 -0
  38. ledgerline-0.3.0/tests/fixtures/sample.ofx +56 -0
  39. ledgerline-0.3.0/tests/fixtures/us_checking_jan.csv +9 -0
  40. ledgerline-0.3.0/tests/test_accounts.py +41 -0
  41. ledgerline-0.3.0/tests/test_categorize.py +164 -0
  42. ledgerline-0.3.0/tests/test_cli.py +87 -0
  43. ledgerline-0.3.0/tests/test_demo.py +92 -0
  44. ledgerline-0.3.0/tests/test_ingest.py +118 -0
  45. ledgerline-0.3.0/tests/test_mcp_server.py +209 -0
  46. ledgerline-0.3.0/tests/test_money.py +33 -0
  47. ledgerline-0.3.0/tests/test_normalize.py +40 -0
  48. ledgerline-0.3.0/tests/test_query.py +175 -0
  49. ledgerline-0.3.0/tests/test_recurring.py +181 -0
  50. ledgerline-0.3.0/tests/test_security.py +81 -0
  51. ledgerline-0.3.0/tests/test_sync.py +246 -0
  52. ledgerline-0.3.0/uv.lock +993 -0
@@ -0,0 +1,17 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+
8
+ jobs:
9
+ test:
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - uses: actions/checkout@v4
13
+ - uses: astral-sh/setup-uv@v5
14
+ with:
15
+ enable-cache: true
16
+ - run: uv sync --frozen
17
+ - run: uv run pytest -q
@@ -0,0 +1,31 @@
1
+ # Financial data never enters git (Security Invariant 1)
2
+ data/
3
+ *.db
4
+ *.db-journal
5
+ *.db-wal
6
+ *.db-shm
7
+ *.csv
8
+ *.ofx
9
+ *.qfx
10
+ .env
11
+
12
+ # Analysis notebooks carry real query output — never committable
13
+ analysis/
14
+ *.ipynb
15
+
16
+ # Test fixtures are the one sanctioned exception: fabricated data only
17
+ !tests/fixtures/*.csv
18
+ !tests/fixtures/*.ofx
19
+ !tests/fixtures/*.qfx
20
+
21
+ # Local tool state (per-machine permission grants, never publishable)
22
+ .claude/
23
+
24
+ # Python
25
+ __pycache__/
26
+ *.pyc
27
+ .venv/
28
+ *.egg-info/
29
+ .pytest_cache/
30
+ dist/
31
+ build/
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Jerald Yuan
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,290 @@
1
+ Metadata-Version: 2.4
2
+ Name: ledgerline
3
+ Version: 0.3.0
4
+ Summary: Local-first personal finance pipeline: ingest bank exports, categorize, detect recurring payments, and expose read-only finance tools to AI agents over MCP
5
+ Project-URL: Homepage, https://github.com/jeraldhu-yuan/ledgerline
6
+ Project-URL: Repository, https://github.com/jeraldhu-yuan/ledgerline
7
+ Project-URL: Issues, https://github.com/jeraldhu-yuan/ledgerline/issues
8
+ Author: Jerald Yuan
9
+ License: MIT License
10
+
11
+ Copyright (c) 2026 Jerald Yuan
12
+
13
+ Permission is hereby granted, free of charge, to any person obtaining a copy
14
+ of this software and associated documentation files (the "Software"), to deal
15
+ in the Software without restriction, including without limitation the rights
16
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17
+ copies of the Software, and to permit persons to whom the Software is
18
+ furnished to do so, subject to the following conditions:
19
+
20
+ The above copyright notice and this permission notice shall be included in all
21
+ copies or substantial portions of the Software.
22
+
23
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29
+ SOFTWARE.
30
+ License-File: LICENSE
31
+ Keywords: budgeting,local-first,mcp,model-context-protocol,personal-finance,simplefin,sqlite,transactions
32
+ Classifier: Development Status :: 4 - Beta
33
+ Classifier: Environment :: Console
34
+ Classifier: Intended Audience :: End Users/Desktop
35
+ Classifier: License :: OSI Approved :: MIT License
36
+ Classifier: Operating System :: OS Independent
37
+ Classifier: Programming Language :: Python :: 3
38
+ Classifier: Programming Language :: Python :: 3.11
39
+ Classifier: Programming Language :: Python :: 3.12
40
+ Classifier: Programming Language :: Python :: 3.13
41
+ Classifier: Topic :: Office/Business :: Financial
42
+ Classifier: Topic :: Office/Business :: Financial :: Accounting
43
+ Requires-Python: >=3.11
44
+ Requires-Dist: anthropic>=0.40
45
+ Requires-Dist: click>=8.1
46
+ Requires-Dist: mcp[cli]>=1.0
47
+ Requires-Dist: rich>=13.0
48
+ Description-Content-Type: text/markdown
49
+
50
+ # ledgerline
51
+
52
+ [![CI](https://github.com/jeraldhu-yuan/ledgerline/actions/workflows/ci.yml/badge.svg)](https://github.com/jeraldhu-yuan/ledgerline/actions/workflows/ci.yml)
53
+
54
+ Give AI agents read-only access to your finances without giving anyone your
55
+ data: one SQLite file on your machine, no cloud, exact integer-cent answers
56
+ over MCP.
57
+
58
+ Everything runs locally. Bank access (via SimpleFIN Bridge) is read-only by
59
+ construction — Ledgerline never sees your banking credentials and cannot
60
+ move money. Account numbers are dropped at parse time, so the model can
61
+ never see what the database never contains. Delete the one `.db` file and
62
+ every trace is gone.
63
+
64
+ <!-- demo.gif goes here
65
+
66
+ Record this 60-second flow (terminal at ~100x30, then your MCP client):
67
+ 1. (0:00) In an empty directory: `uvx --from ledgerline ledgerline demo`
68
+ — let the seed summary and "Try these next" block render.
69
+ 2. (0:10) `uvx --from ledgerline ledgerline summary`
70
+ — pause ~3s on the category table.
71
+ 3. (0:20) `uvx --from ledgerline ledgerline upcoming`
72
+ — pause ~3s on the expected-charges table.
73
+ 4. (0:30) Paste the `claude mcp add ...` one-liner printed by the demo.
74
+ 5. (0:35) Open Claude Code and ask: "What recurring charges are coming up,
75
+ and can my checking balance cover them?" — show the answer citing
76
+ exact amounts from the upcoming_payments tool.
77
+ 6. (0:55) End on the answer.
78
+ -->
79
+
80
+ ## Try it in 90 seconds
81
+
82
+ No clone, no signup, no API key, no real financial data — the demo seeds six
83
+ months of clearly fabricated transactions so you can evaluate everything
84
+ before connecting anything. [uv](https://docs.astral.sh/uv/getting-started/installation/)
85
+ is the only prerequisite.
86
+
87
+ ```sh
88
+ uvx --from ledgerline ledgerline demo
89
+ uvx --from ledgerline ledgerline summary # income/outflow by category
90
+ uvx --from ledgerline ledgerline upcoming # expected charges, next 30 days
91
+ ```
92
+
93
+ `demo` prints copy-paste one-liners that connect the MCP server to Codex or
94
+ Claude Code; then ask things like "What recurring charges are coming up?" or
95
+ "Why was last month so expensive?". When you're done evaluating, delete
96
+ `data/ledgerline.db` and start fresh with real data below. (`demo` refuses
97
+ to write into a database that already has transactions.)
98
+
99
+ ## Quick start with real data
100
+
101
+ ```sh
102
+ git clone https://github.com/jeraldhu-yuan/ledgerline
103
+ cd ledgerline
104
+ uv sync
105
+ ```
106
+
107
+ Then get transactions in. Both paths work, and they can be mixed freely —
108
+ the importer deduplicates.
109
+
110
+ **Bank sync.** Sign up at <https://bridge.simplefin.org> (SimpleFIN Bridge,
111
+ a small paid service that turns your bank logins into read-only transaction
112
+ feeds — Ledgerline never sees your banking credentials), link your bank(s),
113
+ and create a new app on your account page to get a one-time setup token.
114
+ Then:
115
+
116
+ ```sh
117
+ uv run ledgerline connect # paste the setup token when prompted
118
+ uv run ledgerline sync # pull your transactions
119
+ ```
120
+
121
+ `connect` stores the resulting access URL owner-only in
122
+ `~/.config/ledgerline/simplefin.env`. The first `sync` prompts to map each
123
+ bank account to a local label; re-running is always safe, and a stale
124
+ database catches up in provider-friendly 45-day windows. If an institution
125
+ is missing from SimpleFIN's catalog, that account just stays on file
126
+ import — mixing both paths is a supported steady state.
127
+
128
+ **File import.** Download a CSV/OFX/QFX export from your bank's website:
129
+
130
+ ```sh
131
+ uv run ledgerline ingest export.csv --account "Checking"
132
+ ```
133
+
134
+ The database lives at `data/ledgerline.db` (gitignored); override with
135
+ `--db` or `LEDGERLINE_DB`. No API key is needed for any of this — the two
136
+ optional embedded LLM commands (`categorize`, `ask`) read
137
+ `ANTHROPIC_API_KEY` from the environment, and everything else runs keyless.
138
+
139
+ ## AI agent access (recommended)
140
+
141
+ Ledgerline runs as a local stdio MCP server exposing read-only tools: data
142
+ freshness, transaction search, spending summaries, period comparisons,
143
+ account balances, upcoming payments, and constrained SQL. The contract is
144
+ deliberately small and uniform — exact integer cents, totals always per
145
+ currency and never combined, and limitations (staleness, uncategorized
146
+ spend, unknown account purpose) reported as data rather than prescriptive
147
+ workflow text. The reasoning is the client model's job; the server's job is
148
+ exact, truthful primitives.
149
+
150
+ The one cache-writing tool, `refresh_data`, pulls from SimpleFIN at most
151
+ once an hour. A refresh that hits provider errors is recorded as an attempt
152
+ but not a success, and `data_status` discloses the difference.
153
+
154
+ ```sh
155
+ # Codex (user scope)
156
+ codex mcp add ledgerline --env LEDGERLINE_DB=/absolute/path/to/ledgerline.db -- \
157
+ uvx --from ledgerline ledgerline-mcp
158
+
159
+ # Claude Code (user scope)
160
+ claude mcp add --scope user --transport stdio \
161
+ --env LEDGERLINE_DB=/absolute/path/to/ledgerline.db ledgerline -- \
162
+ uvx --from ledgerline ledgerline-mcp
163
+ ```
164
+
165
+ (From a repo checkout, point the command at
166
+ `/path/to/ledgerline/.venv/bin/ledgerline-mcp` instead of `uvx`.) Restart
167
+ the client, then ask things like "How much did I spend on dining in
168
+ January?" or "What recurring charges are coming up?"
169
+
170
+ ## Usage
171
+
172
+ ```sh
173
+ # Monthly summary: income/outflow by category, top merchants, deltas
174
+ uv run ledgerline summary --month 2026-06
175
+
176
+ # Resolve uncached merchants with ONE batched LLM call
177
+ uv run ledgerline categorize
178
+
179
+ # Confirm/correct categories; corrections apply retroactively
180
+ uv run ledgerline review
181
+
182
+ # Recurring payments
183
+ uv run ledgerline recurring detect
184
+ uv run ledgerline recurring add --label "Course tuition installment" \
185
+ --amount 850.00 --cadence monthly --day 21
186
+ uv run ledgerline upcoming --days 30
187
+
188
+ # Embedded Q&A for use without an MCP client (needs ANTHROPIC_API_KEY)
189
+ uv run ledgerline ask "why was June so expensive?"
190
+
191
+ # CSV dump for analysis elsewhere
192
+ uv run ledgerline export --month 2026-06 --out june.csv
193
+
194
+ # Durable account context for agents and reports
195
+ uv run ledgerline accounts set-context "Chequing" --purpose mixed \
196
+ --entity "Northwind Consulting" --business-use-percent 70 \
197
+ --context "Business income plus personal spending"
198
+ ```
199
+
200
+ Account context (`personal`/`business`/`mixed`/`unknown`, owning entity,
201
+ business-use percentage, free-form note) persists in SQLite and rides along
202
+ on every MCP result, so agents segment cash flow before judging it.
203
+
204
+ ## Contributing a bank profile
205
+
206
+ If your bank's CSV doesn't auto-detect, the fix is a ~10-line pull request:
207
+ add one dict to `PROFILES` in
208
+ [`ledgerline/ingest/profiles.py`](ledgerline/ingest/profiles.py). OFX/QFX
209
+ needs no profile.
210
+
211
+ ```python
212
+ "us_checking": {
213
+ "columns": {"date": "Posting Date", "amount": "Amount", "description": "Description"},
214
+ "date_format": "%m/%d/%Y",
215
+ "sign": 1, # -1 if the export shows charges as positive
216
+ "skip_rows": 0,
217
+ "external_id_column": None, # column with a bank-side unique id, if any
218
+ },
219
+ ```
220
+
221
+ Include a small fabricated CSV fixture (invented merchants, never real
222
+ account data) in `tests/fixtures/` and a test asserting it ingests with the
223
+ right sign convention — see `test_sign_convention_profile` in
224
+ [`tests/test_ingest.py`](tests/test_ingest.py) for the pattern.
225
+
226
+ ## Idempotency
227
+
228
+ Re-importing a file, overlapping export ranges, and sync + file import of the
229
+ same period all produce zero duplicates (tested in `tests/test_ingest.py` and
230
+ `tests/test_sync.py`).
231
+
232
+ **Design note — one deliberate deviation from the spec:** the spec folds
233
+ FITID into `dedupe_hash` when present. Done literally, that would *create*
234
+ duplicates in mixed mode: a CSV row (no FITID) and a SimpleFIN row (with id)
235
+ for the same transaction would hash differently. Instead:
236
+
237
+ - `dedupe_hash = sha256(account_id | posted_date | amount_cents | merchant_raw | occurrence_index)`
238
+ with occurrence counting — the Nth identical row in a batch is a duplicate
239
+ only if the DB already holds more than N such rows. Two genuinely distinct
240
+ same-day, same-amount, same-merchant transactions survive because they
241
+ arrive in the same export with occurrence indexes 0 and 1.
242
+ - Bank-side ids (OFX FITID, SimpleFIN txn id) are stored in `external_id`
243
+ with a unique per-account index, short-circuit re-imports, and are
244
+ backfilled onto rows that originally arrived without one.
245
+
246
+ This satisfies every acceptance test, including both orders of mixed-mode.
247
+ Caveat: cross-source dedupe matches on the raw description, so it works when
248
+ both sources export the same description string (typical for OFX/SimpleFIN
249
+ from the same institution).
250
+
251
+ ## Security invariants
252
+
253
+ - `data/`, `*.db`, `*.csv`, `*.ofx`, `*.qfx`, `.env`, `analysis/`, and
254
+ `*.ipynb` gitignored from the first commit; test fixtures and `demo`
255
+ data are fabricated only.
256
+ - Account numbers are never parsed: the OFX reader and SimpleFIN connector
257
+ drop `ACCTID`/`BANKID`-class fields at parse time. Only short labels
258
+ ("US Checking") identify accounts. Asserted in `tests/test_security.py`.
259
+ - The model gets full transaction detail through `run_sql` — by design. What
260
+ it can never see is what the DB never contains: account numbers,
261
+ credentials, raw export files.
262
+ - `run_sql`: read-only connection (`mode=ro` URI), single-statement
263
+ SELECT/WITH only, keyword denylist, SQLite authorizer denying everything
264
+ but reads, 200-row cap, 5-second time limit, statement/result size limits.
265
+ Literals and comments are stripped before the keyword scan (a merchant
266
+ named "UPDATE" is not a false positive); the authorizer and read-only mode
267
+ are the real guards. Tested with hostile inputs.
268
+ - SimpleFIN access URL from `SIMPLEFIN_ACCESS_URL` or a `0600` config file
269
+ only — never the repo, the DB, or the LLM context. `https` is required,
270
+ HTTP redirects are refused (credentials are never replayed to another
271
+ host), and loose file permissions produce a warning.
272
+ - New database files are created owner-only (`0600`).
273
+ - `ANTHROPIC_API_KEY` from env only; LLM steps fail loudly without it,
274
+ everything else runs keyless.
275
+
276
+ ## Tests
277
+
278
+ ```sh
279
+ uv run pytest
280
+ ```
281
+
282
+ The suite covers the acceptance checklist: mixed-mode dedupe in both
283
+ orders, quarantine of malformed rows, integer-cents math, per-currency
284
+ reporting, `run_sql` hardening against hostile inputs, recurring detection
285
+ with gap tolerance, the MCP tools, the demo seeder, and the security
286
+ invariants above.
287
+
288
+ ## License
289
+
290
+ MIT — see [LICENSE](LICENSE).
@@ -0,0 +1,241 @@
1
+ # ledgerline
2
+
3
+ [![CI](https://github.com/jeraldhu-yuan/ledgerline/actions/workflows/ci.yml/badge.svg)](https://github.com/jeraldhu-yuan/ledgerline/actions/workflows/ci.yml)
4
+
5
+ Give AI agents read-only access to your finances without giving anyone your
6
+ data: one SQLite file on your machine, no cloud, exact integer-cent answers
7
+ over MCP.
8
+
9
+ Everything runs locally. Bank access (via SimpleFIN Bridge) is read-only by
10
+ construction — Ledgerline never sees your banking credentials and cannot
11
+ move money. Account numbers are dropped at parse time, so the model can
12
+ never see what the database never contains. Delete the one `.db` file and
13
+ every trace is gone.
14
+
15
+ <!-- demo.gif goes here
16
+
17
+ Record this 60-second flow (terminal at ~100x30, then your MCP client):
18
+ 1. (0:00) In an empty directory: `uvx --from ledgerline ledgerline demo`
19
+ — let the seed summary and "Try these next" block render.
20
+ 2. (0:10) `uvx --from ledgerline ledgerline summary`
21
+ — pause ~3s on the category table.
22
+ 3. (0:20) `uvx --from ledgerline ledgerline upcoming`
23
+ — pause ~3s on the expected-charges table.
24
+ 4. (0:30) Paste the `claude mcp add ...` one-liner printed by the demo.
25
+ 5. (0:35) Open Claude Code and ask: "What recurring charges are coming up,
26
+ and can my checking balance cover them?" — show the answer citing
27
+ exact amounts from the upcoming_payments tool.
28
+ 6. (0:55) End on the answer.
29
+ -->
30
+
31
+ ## Try it in 90 seconds
32
+
33
+ No clone, no signup, no API key, no real financial data — the demo seeds six
34
+ months of clearly fabricated transactions so you can evaluate everything
35
+ before connecting anything. [uv](https://docs.astral.sh/uv/getting-started/installation/)
36
+ is the only prerequisite.
37
+
38
+ ```sh
39
+ uvx --from ledgerline ledgerline demo
40
+ uvx --from ledgerline ledgerline summary # income/outflow by category
41
+ uvx --from ledgerline ledgerline upcoming # expected charges, next 30 days
42
+ ```
43
+
44
+ `demo` prints copy-paste one-liners that connect the MCP server to Codex or
45
+ Claude Code; then ask things like "What recurring charges are coming up?" or
46
+ "Why was last month so expensive?". When you're done evaluating, delete
47
+ `data/ledgerline.db` and start fresh with real data below. (`demo` refuses
48
+ to write into a database that already has transactions.)
49
+
50
+ ## Quick start with real data
51
+
52
+ ```sh
53
+ git clone https://github.com/jeraldhu-yuan/ledgerline
54
+ cd ledgerline
55
+ uv sync
56
+ ```
57
+
58
+ Then get transactions in. Both paths work, and they can be mixed freely —
59
+ the importer deduplicates.
60
+
61
+ **Bank sync.** Sign up at <https://bridge.simplefin.org> (SimpleFIN Bridge,
62
+ a small paid service that turns your bank logins into read-only transaction
63
+ feeds — Ledgerline never sees your banking credentials), link your bank(s),
64
+ and create a new app on your account page to get a one-time setup token.
65
+ Then:
66
+
67
+ ```sh
68
+ uv run ledgerline connect # paste the setup token when prompted
69
+ uv run ledgerline sync # pull your transactions
70
+ ```
71
+
72
+ `connect` stores the resulting access URL owner-only in
73
+ `~/.config/ledgerline/simplefin.env`. The first `sync` prompts to map each
74
+ bank account to a local label; re-running is always safe, and a stale
75
+ database catches up in provider-friendly 45-day windows. If an institution
76
+ is missing from SimpleFIN's catalog, that account just stays on file
77
+ import — mixing both paths is a supported steady state.
78
+
79
+ **File import.** Download a CSV/OFX/QFX export from your bank's website:
80
+
81
+ ```sh
82
+ uv run ledgerline ingest export.csv --account "Checking"
83
+ ```
84
+
85
+ The database lives at `data/ledgerline.db` (gitignored); override with
86
+ `--db` or `LEDGERLINE_DB`. No API key is needed for any of this — the two
87
+ optional embedded LLM commands (`categorize`, `ask`) read
88
+ `ANTHROPIC_API_KEY` from the environment, and everything else runs keyless.
89
+
90
+ ## AI agent access (recommended)
91
+
92
+ Ledgerline runs as a local stdio MCP server exposing read-only tools: data
93
+ freshness, transaction search, spending summaries, period comparisons,
94
+ account balances, upcoming payments, and constrained SQL. The contract is
95
+ deliberately small and uniform — exact integer cents, totals always per
96
+ currency and never combined, and limitations (staleness, uncategorized
97
+ spend, unknown account purpose) reported as data rather than prescriptive
98
+ workflow text. The reasoning is the client model's job; the server's job is
99
+ exact, truthful primitives.
100
+
101
+ The one cache-writing tool, `refresh_data`, pulls from SimpleFIN at most
102
+ once an hour. A refresh that hits provider errors is recorded as an attempt
103
+ but not a success, and `data_status` discloses the difference.
104
+
105
+ ```sh
106
+ # Codex (user scope)
107
+ codex mcp add ledgerline --env LEDGERLINE_DB=/absolute/path/to/ledgerline.db -- \
108
+ uvx --from ledgerline ledgerline-mcp
109
+
110
+ # Claude Code (user scope)
111
+ claude mcp add --scope user --transport stdio \
112
+ --env LEDGERLINE_DB=/absolute/path/to/ledgerline.db ledgerline -- \
113
+ uvx --from ledgerline ledgerline-mcp
114
+ ```
115
+
116
+ (From a repo checkout, point the command at
117
+ `/path/to/ledgerline/.venv/bin/ledgerline-mcp` instead of `uvx`.) Restart
118
+ the client, then ask things like "How much did I spend on dining in
119
+ January?" or "What recurring charges are coming up?"
120
+
121
+ ## Usage
122
+
123
+ ```sh
124
+ # Monthly summary: income/outflow by category, top merchants, deltas
125
+ uv run ledgerline summary --month 2026-06
126
+
127
+ # Resolve uncached merchants with ONE batched LLM call
128
+ uv run ledgerline categorize
129
+
130
+ # Confirm/correct categories; corrections apply retroactively
131
+ uv run ledgerline review
132
+
133
+ # Recurring payments
134
+ uv run ledgerline recurring detect
135
+ uv run ledgerline recurring add --label "Course tuition installment" \
136
+ --amount 850.00 --cadence monthly --day 21
137
+ uv run ledgerline upcoming --days 30
138
+
139
+ # Embedded Q&A for use without an MCP client (needs ANTHROPIC_API_KEY)
140
+ uv run ledgerline ask "why was June so expensive?"
141
+
142
+ # CSV dump for analysis elsewhere
143
+ uv run ledgerline export --month 2026-06 --out june.csv
144
+
145
+ # Durable account context for agents and reports
146
+ uv run ledgerline accounts set-context "Chequing" --purpose mixed \
147
+ --entity "Northwind Consulting" --business-use-percent 70 \
148
+ --context "Business income plus personal spending"
149
+ ```
150
+
151
+ Account context (`personal`/`business`/`mixed`/`unknown`, owning entity,
152
+ business-use percentage, free-form note) persists in SQLite and rides along
153
+ on every MCP result, so agents segment cash flow before judging it.
154
+
155
+ ## Contributing a bank profile
156
+
157
+ If your bank's CSV doesn't auto-detect, the fix is a ~10-line pull request:
158
+ add one dict to `PROFILES` in
159
+ [`ledgerline/ingest/profiles.py`](ledgerline/ingest/profiles.py). OFX/QFX
160
+ needs no profile.
161
+
162
+ ```python
163
+ "us_checking": {
164
+ "columns": {"date": "Posting Date", "amount": "Amount", "description": "Description"},
165
+ "date_format": "%m/%d/%Y",
166
+ "sign": 1, # -1 if the export shows charges as positive
167
+ "skip_rows": 0,
168
+ "external_id_column": None, # column with a bank-side unique id, if any
169
+ },
170
+ ```
171
+
172
+ Include a small fabricated CSV fixture (invented merchants, never real
173
+ account data) in `tests/fixtures/` and a test asserting it ingests with the
174
+ right sign convention — see `test_sign_convention_profile` in
175
+ [`tests/test_ingest.py`](tests/test_ingest.py) for the pattern.
176
+
177
+ ## Idempotency
178
+
179
+ Re-importing a file, overlapping export ranges, and sync + file import of the
180
+ same period all produce zero duplicates (tested in `tests/test_ingest.py` and
181
+ `tests/test_sync.py`).
182
+
183
+ **Design note — one deliberate deviation from the spec:** the spec folds
184
+ FITID into `dedupe_hash` when present. Done literally, that would *create*
185
+ duplicates in mixed mode: a CSV row (no FITID) and a SimpleFIN row (with id)
186
+ for the same transaction would hash differently. Instead:
187
+
188
+ - `dedupe_hash = sha256(account_id | posted_date | amount_cents | merchant_raw | occurrence_index)`
189
+ with occurrence counting — the Nth identical row in a batch is a duplicate
190
+ only if the DB already holds more than N such rows. Two genuinely distinct
191
+ same-day, same-amount, same-merchant transactions survive because they
192
+ arrive in the same export with occurrence indexes 0 and 1.
193
+ - Bank-side ids (OFX FITID, SimpleFIN txn id) are stored in `external_id`
194
+ with a unique per-account index, short-circuit re-imports, and are
195
+ backfilled onto rows that originally arrived without one.
196
+
197
+ This satisfies every acceptance test, including both orders of mixed-mode.
198
+ Caveat: cross-source dedupe matches on the raw description, so it works when
199
+ both sources export the same description string (typical for OFX/SimpleFIN
200
+ from the same institution).
201
+
202
+ ## Security invariants
203
+
204
+ - `data/`, `*.db`, `*.csv`, `*.ofx`, `*.qfx`, `.env`, `analysis/`, and
205
+ `*.ipynb` gitignored from the first commit; test fixtures and `demo`
206
+ data are fabricated only.
207
+ - Account numbers are never parsed: the OFX reader and SimpleFIN connector
208
+ drop `ACCTID`/`BANKID`-class fields at parse time. Only short labels
209
+ ("US Checking") identify accounts. Asserted in `tests/test_security.py`.
210
+ - The model gets full transaction detail through `run_sql` — by design. What
211
+ it can never see is what the DB never contains: account numbers,
212
+ credentials, raw export files.
213
+ - `run_sql`: read-only connection (`mode=ro` URI), single-statement
214
+ SELECT/WITH only, keyword denylist, SQLite authorizer denying everything
215
+ but reads, 200-row cap, 5-second time limit, statement/result size limits.
216
+ Literals and comments are stripped before the keyword scan (a merchant
217
+ named "UPDATE" is not a false positive); the authorizer and read-only mode
218
+ are the real guards. Tested with hostile inputs.
219
+ - SimpleFIN access URL from `SIMPLEFIN_ACCESS_URL` or a `0600` config file
220
+ only — never the repo, the DB, or the LLM context. `https` is required,
221
+ HTTP redirects are refused (credentials are never replayed to another
222
+ host), and loose file permissions produce a warning.
223
+ - New database files are created owner-only (`0600`).
224
+ - `ANTHROPIC_API_KEY` from env only; LLM steps fail loudly without it,
225
+ everything else runs keyless.
226
+
227
+ ## Tests
228
+
229
+ ```sh
230
+ uv run pytest
231
+ ```
232
+
233
+ The suite covers the acceptance checklist: mixed-mode dedupe in both
234
+ orders, quarantine of malformed rows, integer-cents math, per-currency
235
+ reporting, `run_sql` hardening against hostile inputs, recurring detection
236
+ with gap tolerance, the MCP tools, the demo seeder, and the security
237
+ invariants above.
238
+
239
+ ## License
240
+
241
+ MIT — see [LICENSE](LICENSE).
@@ -0,0 +1,7 @@
1
+ """ledgerline — local-first personal finance tracker."""
2
+
3
+ __version__ = "0.3.0"
4
+
5
+
6
+ class LedgerlineError(Exception):
7
+ """User-facing error: print message and exit nonzero, no traceback."""
@@ -0,0 +1,60 @@
1
+ """Durable account metadata used to interpret financial activity."""
2
+
3
+ import sqlite3
4
+ from typing import Any
5
+
6
+ PURPOSES = ("personal", "business", "mixed", "unknown")
7
+ ANALYSIS_TREATMENTS = ("include", "monitor_only", "exclude")
8
+
9
+
10
+ def set_context(
11
+ conn: sqlite3.Connection,
12
+ account_name: str,
13
+ *,
14
+ purpose: str | None = None,
15
+ entity_name: str | None = None,
16
+ business_use_percent: int | None = None,
17
+ context_note: str | None = None,
18
+ analysis_treatment: str | None = None,
19
+ ) -> dict[str, Any]:
20
+ """Update interpretive metadata without changing bank-sourced fields."""
21
+ row = conn.execute("SELECT * FROM accounts WHERE name = ?", (account_name,)).fetchone()
22
+ if not row:
23
+ raise ValueError(f"unknown account: {account_name}")
24
+ if purpose is not None and purpose not in PURPOSES:
25
+ raise ValueError(f"purpose must be one of {', '.join(PURPOSES)}")
26
+ if business_use_percent is not None and not 0 <= business_use_percent <= 100:
27
+ raise ValueError("business_use_percent must be between 0 and 100")
28
+ if analysis_treatment is not None and analysis_treatment not in ANALYSIS_TREATMENTS:
29
+ raise ValueError(
30
+ f"analysis_treatment must be one of {', '.join(ANALYSIS_TREATMENTS)}"
31
+ )
32
+
33
+ updates: dict[str, object | None] = {}
34
+ if purpose is not None:
35
+ updates["purpose"] = purpose
36
+ if business_use_percent is None:
37
+ if purpose == "personal":
38
+ updates["business_use_percent"] = 0
39
+ elif purpose == "business":
40
+ updates["business_use_percent"] = 100
41
+ elif purpose == "unknown":
42
+ updates["business_use_percent"] = None
43
+ if entity_name is not None:
44
+ updates["entity_name"] = entity_name.strip() or None
45
+ if business_use_percent is not None:
46
+ updates["business_use_percent"] = business_use_percent
47
+ if context_note is not None:
48
+ updates["context_note"] = context_note.strip() or None
49
+ if analysis_treatment is not None:
50
+ updates["analysis_treatment"] = analysis_treatment
51
+ if not updates:
52
+ raise ValueError("provide at least one account metadata field to update")
53
+
54
+ assignments = ", ".join(f"{column} = ?" for column in updates)
55
+ conn.execute(
56
+ f"UPDATE accounts SET {assignments} WHERE id = ?",
57
+ [*updates.values(), row["id"]],
58
+ )
59
+ conn.commit()
60
+ return dict(conn.execute("SELECT * FROM accounts WHERE id = ?", (row["id"],)).fetchone())