ledgerline 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ledgerline-0.3.0/.github/workflows/ci.yml +17 -0
- ledgerline-0.3.0/.gitignore +31 -0
- ledgerline-0.3.0/LICENSE +21 -0
- ledgerline-0.3.0/PKG-INFO +290 -0
- ledgerline-0.3.0/README.md +241 -0
- ledgerline-0.3.0/ledgerline/__init__.py +7 -0
- ledgerline-0.3.0/ledgerline/accounts.py +60 -0
- ledgerline-0.3.0/ledgerline/categorize.py +195 -0
- ledgerline-0.3.0/ledgerline/cli.py +457 -0
- ledgerline-0.3.0/ledgerline/connectors/__init__.py +0 -0
- ledgerline-0.3.0/ledgerline/connectors/simplefin.py +314 -0
- ledgerline-0.3.0/ledgerline/db.py +64 -0
- ledgerline-0.3.0/ledgerline/demo.py +195 -0
- ledgerline-0.3.0/ledgerline/ingest/__init__.py +178 -0
- ledgerline-0.3.0/ledgerline/ingest/csv_generic.py +41 -0
- ledgerline-0.3.0/ledgerline/ingest/ofx.py +62 -0
- ledgerline-0.3.0/ledgerline/ingest/profiles.py +35 -0
- ledgerline-0.3.0/ledgerline/ingest/types.py +24 -0
- ledgerline-0.3.0/ledgerline/llm.py +23 -0
- ledgerline-0.3.0/ledgerline/mcp_server.py +729 -0
- ledgerline-0.3.0/ledgerline/migrations/001_init.sql +67 -0
- ledgerline-0.3.0/ledgerline/migrations/002_account_balances.sql +3 -0
- ledgerline-0.3.0/ledgerline/migrations/003_recurring_scope.sql +4 -0
- ledgerline-0.3.0/ledgerline/migrations/004_sync_state.sql +4 -0
- ledgerline-0.3.0/ledgerline/migrations/005_account_context.sql +7 -0
- ledgerline-0.3.0/ledgerline/migrations/006_account_analysis_treatment.sql +4 -0
- ledgerline-0.3.0/ledgerline/money.py +30 -0
- ledgerline-0.3.0/ledgerline/normalize.py +84 -0
- ledgerline-0.3.0/ledgerline/query.py +294 -0
- ledgerline-0.3.0/ledgerline/recurring.py +209 -0
- ledgerline-0.3.0/pyproject.toml +59 -0
- ledgerline-0.3.0/tests/__init__.py +0 -0
- ledgerline-0.3.0/tests/conftest.py +19 -0
- ledgerline-0.3.0/tests/fixtures/generic_visa_jan.csv +5 -0
- ledgerline-0.3.0/tests/fixtures/malformed.csv +6 -0
- ledgerline-0.3.0/tests/fixtures/overlap_1.csv +4 -0
- ledgerline-0.3.0/tests/fixtures/overlap_2.csv +4 -0
- ledgerline-0.3.0/tests/fixtures/sample.ofx +56 -0
- ledgerline-0.3.0/tests/fixtures/us_checking_jan.csv +9 -0
- ledgerline-0.3.0/tests/test_accounts.py +41 -0
- ledgerline-0.3.0/tests/test_categorize.py +164 -0
- ledgerline-0.3.0/tests/test_cli.py +87 -0
- ledgerline-0.3.0/tests/test_demo.py +92 -0
- ledgerline-0.3.0/tests/test_ingest.py +118 -0
- ledgerline-0.3.0/tests/test_mcp_server.py +209 -0
- ledgerline-0.3.0/tests/test_money.py +33 -0
- ledgerline-0.3.0/tests/test_normalize.py +40 -0
- ledgerline-0.3.0/tests/test_query.py +175 -0
- ledgerline-0.3.0/tests/test_recurring.py +181 -0
- ledgerline-0.3.0/tests/test_security.py +81 -0
- ledgerline-0.3.0/tests/test_sync.py +246 -0
- ledgerline-0.3.0/uv.lock +993 -0
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
test:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
steps:
|
|
12
|
+
- uses: actions/checkout@v4
|
|
13
|
+
- uses: astral-sh/setup-uv@v5
|
|
14
|
+
with:
|
|
15
|
+
enable-cache: true
|
|
16
|
+
- run: uv sync --frozen
|
|
17
|
+
- run: uv run pytest -q
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Financial data never enters git (Security Invariant 1)
|
|
2
|
+
data/
|
|
3
|
+
*.db
|
|
4
|
+
*.db-journal
|
|
5
|
+
*.db-wal
|
|
6
|
+
*.db-shm
|
|
7
|
+
*.csv
|
|
8
|
+
*.ofx
|
|
9
|
+
*.qfx
|
|
10
|
+
.env
|
|
11
|
+
|
|
12
|
+
# Analysis notebooks carry real query output — never committable
|
|
13
|
+
analysis/
|
|
14
|
+
*.ipynb
|
|
15
|
+
|
|
16
|
+
# Test fixtures are the one sanctioned exception: fabricated data only
|
|
17
|
+
!tests/fixtures/*.csv
|
|
18
|
+
!tests/fixtures/*.ofx
|
|
19
|
+
!tests/fixtures/*.qfx
|
|
20
|
+
|
|
21
|
+
# Local tool state (per-machine permission grants, never publishable)
|
|
22
|
+
.claude/
|
|
23
|
+
|
|
24
|
+
# Python
|
|
25
|
+
__pycache__/
|
|
26
|
+
*.pyc
|
|
27
|
+
.venv/
|
|
28
|
+
*.egg-info/
|
|
29
|
+
.pytest_cache/
|
|
30
|
+
dist/
|
|
31
|
+
build/
|
ledgerline-0.3.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Jerald Yuan
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ledgerline
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: Local-first personal finance pipeline: ingest bank exports, categorize, detect recurring payments, and expose read-only finance tools to AI agents over MCP
|
|
5
|
+
Project-URL: Homepage, https://github.com/jeraldhu-yuan/ledgerline
|
|
6
|
+
Project-URL: Repository, https://github.com/jeraldhu-yuan/ledgerline
|
|
7
|
+
Project-URL: Issues, https://github.com/jeraldhu-yuan/ledgerline/issues
|
|
8
|
+
Author: Jerald Yuan
|
|
9
|
+
License: MIT License
|
|
10
|
+
|
|
11
|
+
Copyright (c) 2026 Jerald Yuan
|
|
12
|
+
|
|
13
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
14
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
15
|
+
in the Software without restriction, including without limitation the rights
|
|
16
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
17
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
18
|
+
furnished to do so, subject to the following conditions:
|
|
19
|
+
|
|
20
|
+
The above copyright notice and this permission notice shall be included in all
|
|
21
|
+
copies or substantial portions of the Software.
|
|
22
|
+
|
|
23
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
24
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
25
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
26
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
27
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
28
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
29
|
+
SOFTWARE.
|
|
30
|
+
License-File: LICENSE
|
|
31
|
+
Keywords: budgeting,local-first,mcp,model-context-protocol,personal-finance,simplefin,sqlite,transactions
|
|
32
|
+
Classifier: Development Status :: 4 - Beta
|
|
33
|
+
Classifier: Environment :: Console
|
|
34
|
+
Classifier: Intended Audience :: End Users/Desktop
|
|
35
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
36
|
+
Classifier: Operating System :: OS Independent
|
|
37
|
+
Classifier: Programming Language :: Python :: 3
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
39
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
40
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
41
|
+
Classifier: Topic :: Office/Business :: Financial
|
|
42
|
+
Classifier: Topic :: Office/Business :: Financial :: Accounting
|
|
43
|
+
Requires-Python: >=3.11
|
|
44
|
+
Requires-Dist: anthropic>=0.40
|
|
45
|
+
Requires-Dist: click>=8.1
|
|
46
|
+
Requires-Dist: mcp[cli]>=1.0
|
|
47
|
+
Requires-Dist: rich>=13.0
|
|
48
|
+
Description-Content-Type: text/markdown
|
|
49
|
+
|
|
50
|
+
# ledgerline
|
|
51
|
+
|
|
52
|
+
[](https://github.com/jeraldhu-yuan/ledgerline/actions/workflows/ci.yml)
|
|
53
|
+
|
|
54
|
+
Give AI agents read-only access to your finances without giving anyone your
|
|
55
|
+
data: one SQLite file on your machine, no cloud, exact integer-cent answers
|
|
56
|
+
over MCP.
|
|
57
|
+
|
|
58
|
+
Everything runs locally. Bank access (via SimpleFIN Bridge) is read-only by
|
|
59
|
+
construction — Ledgerline never sees your banking credentials and cannot
|
|
60
|
+
move money. Account numbers are dropped at parse time, so the model can
|
|
61
|
+
never see what the database never contains. Delete the one `.db` file and
|
|
62
|
+
every trace is gone.
|
|
63
|
+
|
|
64
|
+
<!-- demo.gif goes here
|
|
65
|
+
|
|
66
|
+
Record this 60-second flow (terminal at ~100x30, then your MCP client):
|
|
67
|
+
1. (0:00) In an empty directory: `uvx --from ledgerline ledgerline demo`
|
|
68
|
+
— let the seed summary and "Try these next" block render.
|
|
69
|
+
2. (0:10) `uvx --from ledgerline ledgerline summary`
|
|
70
|
+
— pause ~3s on the category table.
|
|
71
|
+
3. (0:20) `uvx --from ledgerline ledgerline upcoming`
|
|
72
|
+
— pause ~3s on the expected-charges table.
|
|
73
|
+
4. (0:30) Paste the `claude mcp add ...` one-liner printed by the demo.
|
|
74
|
+
5. (0:35) Open Claude Code and ask: "What recurring charges are coming up,
|
|
75
|
+
and can my checking balance cover them?" — show the answer citing
|
|
76
|
+
exact amounts from the upcoming_payments tool.
|
|
77
|
+
6. (0:55) End on the answer.
|
|
78
|
+
-->
|
|
79
|
+
|
|
80
|
+
## Try it in 90 seconds
|
|
81
|
+
|
|
82
|
+
No clone, no signup, no API key, no real financial data — the demo seeds six
|
|
83
|
+
months of clearly fabricated transactions so you can evaluate everything
|
|
84
|
+
before connecting anything. [uv](https://docs.astral.sh/uv/getting-started/installation/)
|
|
85
|
+
is the only prerequisite.
|
|
86
|
+
|
|
87
|
+
```sh
|
|
88
|
+
uvx --from ledgerline ledgerline demo
|
|
89
|
+
uvx --from ledgerline ledgerline summary # income/outflow by category
|
|
90
|
+
uvx --from ledgerline ledgerline upcoming # expected charges, next 30 days
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
`demo` prints copy-paste one-liners that connect the MCP server to Codex or
|
|
94
|
+
Claude Code; then ask things like "What recurring charges are coming up?" or
|
|
95
|
+
"Why was last month so expensive?". When you're done evaluating, delete
|
|
96
|
+
`data/ledgerline.db` and start fresh with real data below. (`demo` refuses
|
|
97
|
+
to write into a database that already has transactions.)
|
|
98
|
+
|
|
99
|
+
## Quick start with real data
|
|
100
|
+
|
|
101
|
+
```sh
|
|
102
|
+
git clone https://github.com/jeraldhu-yuan/ledgerline
|
|
103
|
+
cd ledgerline
|
|
104
|
+
uv sync
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
Then get transactions in. Both paths work, and they can be mixed freely —
|
|
108
|
+
the importer deduplicates.
|
|
109
|
+
|
|
110
|
+
**Bank sync.** Sign up at <https://bridge.simplefin.org> (SimpleFIN Bridge,
|
|
111
|
+
a small paid service that turns your bank logins into read-only transaction
|
|
112
|
+
feeds — Ledgerline never sees your banking credentials), link your bank(s),
|
|
113
|
+
and create a new app on your account page to get a one-time setup token.
|
|
114
|
+
Then:
|
|
115
|
+
|
|
116
|
+
```sh
|
|
117
|
+
uv run ledgerline connect # paste the setup token when prompted
|
|
118
|
+
uv run ledgerline sync # pull your transactions
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
`connect` stores the resulting access URL owner-only in
|
|
122
|
+
`~/.config/ledgerline/simplefin.env`. The first `sync` prompts to map each
|
|
123
|
+
bank account to a local label; re-running is always safe, and a stale
|
|
124
|
+
database catches up in provider-friendly 45-day windows. If an institution
|
|
125
|
+
is missing from SimpleFIN's catalog, that account just stays on file
|
|
126
|
+
import — mixing both paths is a supported steady state.
|
|
127
|
+
|
|
128
|
+
**File import.** Download a CSV/OFX/QFX export from your bank's website:
|
|
129
|
+
|
|
130
|
+
```sh
|
|
131
|
+
uv run ledgerline ingest export.csv --account "Checking"
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
The database lives at `data/ledgerline.db` (gitignored); override with
|
|
135
|
+
`--db` or `LEDGERLINE_DB`. No API key is needed for any of this — the two
|
|
136
|
+
optional embedded LLM commands (`categorize`, `ask`) read
|
|
137
|
+
`ANTHROPIC_API_KEY` from the environment, and everything else runs keyless.
|
|
138
|
+
|
|
139
|
+
## AI agent access (recommended)
|
|
140
|
+
|
|
141
|
+
Ledgerline runs as a local stdio MCP server exposing read-only tools: data
|
|
142
|
+
freshness, transaction search, spending summaries, period comparisons,
|
|
143
|
+
account balances, upcoming payments, and constrained SQL. The contract is
|
|
144
|
+
deliberately small and uniform — exact integer cents, totals always per
|
|
145
|
+
currency and never combined, and limitations (staleness, uncategorized
|
|
146
|
+
spend, unknown account purpose) reported as data rather than prescriptive
|
|
147
|
+
workflow text. The reasoning is the client model's job; the server's job is
|
|
148
|
+
exact, truthful primitives.
|
|
149
|
+
|
|
150
|
+
The one cache-writing tool, `refresh_data`, pulls from SimpleFIN at most
|
|
151
|
+
once an hour. A refresh that hits provider errors is recorded as an attempt
|
|
152
|
+
but not a success, and `data_status` discloses the difference.
|
|
153
|
+
|
|
154
|
+
```sh
|
|
155
|
+
# Codex (user scope)
|
|
156
|
+
codex mcp add ledgerline --env LEDGERLINE_DB=/absolute/path/to/ledgerline.db -- \
|
|
157
|
+
uvx --from ledgerline ledgerline-mcp
|
|
158
|
+
|
|
159
|
+
# Claude Code (user scope)
|
|
160
|
+
claude mcp add --scope user --transport stdio \
|
|
161
|
+
--env LEDGERLINE_DB=/absolute/path/to/ledgerline.db ledgerline -- \
|
|
162
|
+
uvx --from ledgerline ledgerline-mcp
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
(From a repo checkout, point the command at
|
|
166
|
+
`/path/to/ledgerline/.venv/bin/ledgerline-mcp` instead of `uvx`.) Restart
|
|
167
|
+
the client, then ask things like "How much did I spend on dining in
|
|
168
|
+
January?" or "What recurring charges are coming up?"
|
|
169
|
+
|
|
170
|
+
## Usage
|
|
171
|
+
|
|
172
|
+
```sh
|
|
173
|
+
# Monthly summary: income/outflow by category, top merchants, deltas
|
|
174
|
+
uv run ledgerline summary --month 2026-06
|
|
175
|
+
|
|
176
|
+
# Resolve uncached merchants with ONE batched LLM call
|
|
177
|
+
uv run ledgerline categorize
|
|
178
|
+
|
|
179
|
+
# Confirm/correct categories; corrections apply retroactively
|
|
180
|
+
uv run ledgerline review
|
|
181
|
+
|
|
182
|
+
# Recurring payments
|
|
183
|
+
uv run ledgerline recurring detect
|
|
184
|
+
uv run ledgerline recurring add --label "Course tuition installment" \
|
|
185
|
+
--amount 850.00 --cadence monthly --day 21
|
|
186
|
+
uv run ledgerline upcoming --days 30
|
|
187
|
+
|
|
188
|
+
# Embedded Q&A for use without an MCP client (needs ANTHROPIC_API_KEY)
|
|
189
|
+
uv run ledgerline ask "why was June so expensive?"
|
|
190
|
+
|
|
191
|
+
# CSV dump for analysis elsewhere
|
|
192
|
+
uv run ledgerline export --month 2026-06 --out june.csv
|
|
193
|
+
|
|
194
|
+
# Durable account context for agents and reports
|
|
195
|
+
uv run ledgerline accounts set-context "Chequing" --purpose mixed \
|
|
196
|
+
--entity "Northwind Consulting" --business-use-percent 70 \
|
|
197
|
+
--context "Business income plus personal spending"
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
Account context (`personal`/`business`/`mixed`/`unknown`, owning entity,
|
|
201
|
+
business-use percentage, free-form note) persists in SQLite and rides along
|
|
202
|
+
on every MCP result, so agents segment cash flow before judging it.
|
|
203
|
+
|
|
204
|
+
## Contributing a bank profile
|
|
205
|
+
|
|
206
|
+
If your bank's CSV doesn't auto-detect, the fix is a ~10-line pull request:
|
|
207
|
+
add one dict to `PROFILES` in
|
|
208
|
+
[`ledgerline/ingest/profiles.py`](ledgerline/ingest/profiles.py). OFX/QFX
|
|
209
|
+
needs no profile.
|
|
210
|
+
|
|
211
|
+
```python
|
|
212
|
+
"us_checking": {
|
|
213
|
+
"columns": {"date": "Posting Date", "amount": "Amount", "description": "Description"},
|
|
214
|
+
"date_format": "%m/%d/%Y",
|
|
215
|
+
"sign": 1, # -1 if the export shows charges as positive
|
|
216
|
+
"skip_rows": 0,
|
|
217
|
+
"external_id_column": None, # column with a bank-side unique id, if any
|
|
218
|
+
},
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
Include a small fabricated CSV fixture (invented merchants, never real
|
|
222
|
+
account data) in `tests/fixtures/` and a test asserting it ingests with the
|
|
223
|
+
right sign convention — see `test_sign_convention_profile` in
|
|
224
|
+
[`tests/test_ingest.py`](tests/test_ingest.py) for the pattern.
|
|
225
|
+
|
|
226
|
+
## Idempotency
|
|
227
|
+
|
|
228
|
+
Re-importing a file, overlapping export ranges, and sync + file import of the
|
|
229
|
+
same period all produce zero duplicates (tested in `tests/test_ingest.py` and
|
|
230
|
+
`tests/test_sync.py`).
|
|
231
|
+
|
|
232
|
+
**Design note — one deliberate deviation from the spec:** the spec folds
|
|
233
|
+
FITID into `dedupe_hash` when present. Done literally, that would *create*
|
|
234
|
+
duplicates in mixed mode: a CSV row (no FITID) and a SimpleFIN row (with id)
|
|
235
|
+
for the same transaction would hash differently. Instead:
|
|
236
|
+
|
|
237
|
+
- `dedupe_hash = sha256(account_id | posted_date | amount_cents | merchant_raw | occurrence_index)`
|
|
238
|
+
with occurrence counting — the Nth identical row in a batch is a duplicate
|
|
239
|
+
only if the DB already holds more than N such rows. Two genuinely distinct
|
|
240
|
+
same-day, same-amount, same-merchant transactions survive because they
|
|
241
|
+
arrive in the same export with occurrence indexes 0 and 1.
|
|
242
|
+
- Bank-side ids (OFX FITID, SimpleFIN txn id) are stored in `external_id`
|
|
243
|
+
with a unique per-account index, short-circuit re-imports, and are
|
|
244
|
+
backfilled onto rows that originally arrived without one.
|
|
245
|
+
|
|
246
|
+
This satisfies every acceptance test, including both orders of mixed-mode.
|
|
247
|
+
Caveat: cross-source dedupe matches on the raw description, so it works when
|
|
248
|
+
both sources export the same description string (typical for OFX/SimpleFIN
|
|
249
|
+
from the same institution).
|
|
250
|
+
|
|
251
|
+
## Security invariants
|
|
252
|
+
|
|
253
|
+
- `data/`, `*.db`, `*.csv`, `*.ofx`, `*.qfx`, `.env`, `analysis/`, and
|
|
254
|
+
`*.ipynb` gitignored from the first commit; test fixtures and `demo`
|
|
255
|
+
data are fabricated only.
|
|
256
|
+
- Account numbers are never parsed: the OFX reader and SimpleFIN connector
|
|
257
|
+
drop `ACCTID`/`BANKID`-class fields at parse time. Only short labels
|
|
258
|
+
("US Checking") identify accounts. Asserted in `tests/test_security.py`.
|
|
259
|
+
- The model gets full transaction detail through `run_sql` — by design. What
|
|
260
|
+
it can never see is what the DB never contains: account numbers,
|
|
261
|
+
credentials, raw export files.
|
|
262
|
+
- `run_sql`: read-only connection (`mode=ro` URI), single-statement
|
|
263
|
+
SELECT/WITH only, keyword denylist, SQLite authorizer denying everything
|
|
264
|
+
but reads, 200-row cap, 5-second time limit, statement/result size limits.
|
|
265
|
+
Literals and comments are stripped before the keyword scan (a merchant
|
|
266
|
+
named "UPDATE" is not a false positive); the authorizer and read-only mode
|
|
267
|
+
are the real guards. Tested with hostile inputs.
|
|
268
|
+
- SimpleFIN access URL from `SIMPLEFIN_ACCESS_URL` or a `0600` config file
|
|
269
|
+
only — never the repo, the DB, or the LLM context. `https` is required,
|
|
270
|
+
HTTP redirects are refused (credentials are never replayed to another
|
|
271
|
+
host), and loose file permissions produce a warning.
|
|
272
|
+
- New database files are created owner-only (`0600`).
|
|
273
|
+
- `ANTHROPIC_API_KEY` from env only; LLM steps fail loudly without it,
|
|
274
|
+
everything else runs keyless.
|
|
275
|
+
|
|
276
|
+
## Tests
|
|
277
|
+
|
|
278
|
+
```sh
|
|
279
|
+
uv run pytest
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
The suite covers the acceptance checklist: mixed-mode dedupe in both
|
|
283
|
+
orders, quarantine of malformed rows, integer-cents math, per-currency
|
|
284
|
+
reporting, `run_sql` hardening against hostile inputs, recurring detection
|
|
285
|
+
with gap tolerance, the MCP tools, the demo seeder, and the security
|
|
286
|
+
invariants above.
|
|
287
|
+
|
|
288
|
+
## License
|
|
289
|
+
|
|
290
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
# ledgerline
|
|
2
|
+
|
|
3
|
+
[](https://github.com/jeraldhu-yuan/ledgerline/actions/workflows/ci.yml)
|
|
4
|
+
|
|
5
|
+
Give AI agents read-only access to your finances without giving anyone your
|
|
6
|
+
data: one SQLite file on your machine, no cloud, exact integer-cent answers
|
|
7
|
+
over MCP.
|
|
8
|
+
|
|
9
|
+
Everything runs locally. Bank access (via SimpleFIN Bridge) is read-only by
|
|
10
|
+
construction — Ledgerline never sees your banking credentials and cannot
|
|
11
|
+
move money. Account numbers are dropped at parse time, so the model can
|
|
12
|
+
never see what the database never contains. Delete the one `.db` file and
|
|
13
|
+
every trace is gone.
|
|
14
|
+
|
|
15
|
+
<!-- demo.gif goes here
|
|
16
|
+
|
|
17
|
+
Record this 60-second flow (terminal at ~100x30, then your MCP client):
|
|
18
|
+
1. (0:00) In an empty directory: `uvx --from ledgerline ledgerline demo`
|
|
19
|
+
— let the seed summary and "Try these next" block render.
|
|
20
|
+
2. (0:10) `uvx --from ledgerline ledgerline summary`
|
|
21
|
+
— pause ~3s on the category table.
|
|
22
|
+
3. (0:20) `uvx --from ledgerline ledgerline upcoming`
|
|
23
|
+
— pause ~3s on the expected-charges table.
|
|
24
|
+
4. (0:30) Paste the `claude mcp add ...` one-liner printed by the demo.
|
|
25
|
+
5. (0:35) Open Claude Code and ask: "What recurring charges are coming up,
|
|
26
|
+
and can my checking balance cover them?" — show the answer citing
|
|
27
|
+
exact amounts from the upcoming_payments tool.
|
|
28
|
+
6. (0:55) End on the answer.
|
|
29
|
+
-->
|
|
30
|
+
|
|
31
|
+
## Try it in 90 seconds
|
|
32
|
+
|
|
33
|
+
No clone, no signup, no API key, no real financial data — the demo seeds six
|
|
34
|
+
months of clearly fabricated transactions so you can evaluate everything
|
|
35
|
+
before connecting anything. [uv](https://docs.astral.sh/uv/getting-started/installation/)
|
|
36
|
+
is the only prerequisite.
|
|
37
|
+
|
|
38
|
+
```sh
|
|
39
|
+
uvx --from ledgerline ledgerline demo
|
|
40
|
+
uvx --from ledgerline ledgerline summary # income/outflow by category
|
|
41
|
+
uvx --from ledgerline ledgerline upcoming # expected charges, next 30 days
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
`demo` prints copy-paste one-liners that connect the MCP server to Codex or
|
|
45
|
+
Claude Code; then ask things like "What recurring charges are coming up?" or
|
|
46
|
+
"Why was last month so expensive?". When you're done evaluating, delete
|
|
47
|
+
`data/ledgerline.db` and start fresh with real data below. (`demo` refuses
|
|
48
|
+
to write into a database that already has transactions.)
|
|
49
|
+
|
|
50
|
+
## Quick start with real data
|
|
51
|
+
|
|
52
|
+
```sh
|
|
53
|
+
git clone https://github.com/jeraldhu-yuan/ledgerline
|
|
54
|
+
cd ledgerline
|
|
55
|
+
uv sync
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Then get transactions in. Both paths work, and they can be mixed freely —
|
|
59
|
+
the importer deduplicates.
|
|
60
|
+
|
|
61
|
+
**Bank sync.** Sign up at <https://bridge.simplefin.org> (SimpleFIN Bridge,
|
|
62
|
+
a small paid service that turns your bank logins into read-only transaction
|
|
63
|
+
feeds — Ledgerline never sees your banking credentials), link your bank(s),
|
|
64
|
+
and create a new app on your account page to get a one-time setup token.
|
|
65
|
+
Then:
|
|
66
|
+
|
|
67
|
+
```sh
|
|
68
|
+
uv run ledgerline connect # paste the setup token when prompted
|
|
69
|
+
uv run ledgerline sync # pull your transactions
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
`connect` stores the resulting access URL owner-only in
|
|
73
|
+
`~/.config/ledgerline/simplefin.env`. The first `sync` prompts to map each
|
|
74
|
+
bank account to a local label; re-running is always safe, and a stale
|
|
75
|
+
database catches up in provider-friendly 45-day windows. If an institution
|
|
76
|
+
is missing from SimpleFIN's catalog, that account just stays on file
|
|
77
|
+
import — mixing both paths is a supported steady state.
|
|
78
|
+
|
|
79
|
+
**File import.** Download a CSV/OFX/QFX export from your bank's website:
|
|
80
|
+
|
|
81
|
+
```sh
|
|
82
|
+
uv run ledgerline ingest export.csv --account "Checking"
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
The database lives at `data/ledgerline.db` (gitignored); override with
|
|
86
|
+
`--db` or `LEDGERLINE_DB`. No API key is needed for any of this — the two
|
|
87
|
+
optional embedded LLM commands (`categorize`, `ask`) read
|
|
88
|
+
`ANTHROPIC_API_KEY` from the environment, and everything else runs keyless.
|
|
89
|
+
|
|
90
|
+
## AI agent access (recommended)
|
|
91
|
+
|
|
92
|
+
Ledgerline runs as a local stdio MCP server exposing read-only tools: data
|
|
93
|
+
freshness, transaction search, spending summaries, period comparisons,
|
|
94
|
+
account balances, upcoming payments, and constrained SQL. The contract is
|
|
95
|
+
deliberately small and uniform — exact integer cents, totals always per
|
|
96
|
+
currency and never combined, and limitations (staleness, uncategorized
|
|
97
|
+
spend, unknown account purpose) reported as data rather than prescriptive
|
|
98
|
+
workflow text. The reasoning is the client model's job; the server's job is
|
|
99
|
+
exact, truthful primitives.
|
|
100
|
+
|
|
101
|
+
The one cache-writing tool, `refresh_data`, pulls from SimpleFIN at most
|
|
102
|
+
once an hour. A refresh that hits provider errors is recorded as an attempt
|
|
103
|
+
but not a success, and `data_status` discloses the difference.
|
|
104
|
+
|
|
105
|
+
```sh
|
|
106
|
+
# Codex (user scope)
|
|
107
|
+
codex mcp add ledgerline --env LEDGERLINE_DB=/absolute/path/to/ledgerline.db -- \
|
|
108
|
+
uvx --from ledgerline ledgerline-mcp
|
|
109
|
+
|
|
110
|
+
# Claude Code (user scope)
|
|
111
|
+
claude mcp add --scope user --transport stdio \
|
|
112
|
+
--env LEDGERLINE_DB=/absolute/path/to/ledgerline.db ledgerline -- \
|
|
113
|
+
uvx --from ledgerline ledgerline-mcp
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
(From a repo checkout, point the command at
|
|
117
|
+
`/path/to/ledgerline/.venv/bin/ledgerline-mcp` instead of `uvx`.) Restart
|
|
118
|
+
the client, then ask things like "How much did I spend on dining in
|
|
119
|
+
January?" or "What recurring charges are coming up?"
|
|
120
|
+
|
|
121
|
+
## Usage
|
|
122
|
+
|
|
123
|
+
```sh
|
|
124
|
+
# Monthly summary: income/outflow by category, top merchants, deltas
|
|
125
|
+
uv run ledgerline summary --month 2026-06
|
|
126
|
+
|
|
127
|
+
# Resolve uncached merchants with ONE batched LLM call
|
|
128
|
+
uv run ledgerline categorize
|
|
129
|
+
|
|
130
|
+
# Confirm/correct categories; corrections apply retroactively
|
|
131
|
+
uv run ledgerline review
|
|
132
|
+
|
|
133
|
+
# Recurring payments
|
|
134
|
+
uv run ledgerline recurring detect
|
|
135
|
+
uv run ledgerline recurring add --label "Course tuition installment" \
|
|
136
|
+
--amount 850.00 --cadence monthly --day 21
|
|
137
|
+
uv run ledgerline upcoming --days 30
|
|
138
|
+
|
|
139
|
+
# Embedded Q&A for use without an MCP client (needs ANTHROPIC_API_KEY)
|
|
140
|
+
uv run ledgerline ask "why was June so expensive?"
|
|
141
|
+
|
|
142
|
+
# CSV dump for analysis elsewhere
|
|
143
|
+
uv run ledgerline export --month 2026-06 --out june.csv
|
|
144
|
+
|
|
145
|
+
# Durable account context for agents and reports
|
|
146
|
+
uv run ledgerline accounts set-context "Chequing" --purpose mixed \
|
|
147
|
+
--entity "Northwind Consulting" --business-use-percent 70 \
|
|
148
|
+
--context "Business income plus personal spending"
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
Account context (`personal`/`business`/`mixed`/`unknown`, owning entity,
|
|
152
|
+
business-use percentage, free-form note) persists in SQLite and rides along
|
|
153
|
+
on every MCP result, so agents segment cash flow before judging it.
|
|
154
|
+
|
|
155
|
+
## Contributing a bank profile
|
|
156
|
+
|
|
157
|
+
If your bank's CSV doesn't auto-detect, the fix is a ~10-line pull request:
|
|
158
|
+
add one dict to `PROFILES` in
|
|
159
|
+
[`ledgerline/ingest/profiles.py`](ledgerline/ingest/profiles.py). OFX/QFX
|
|
160
|
+
needs no profile.
|
|
161
|
+
|
|
162
|
+
```python
|
|
163
|
+
"us_checking": {
|
|
164
|
+
"columns": {"date": "Posting Date", "amount": "Amount", "description": "Description"},
|
|
165
|
+
"date_format": "%m/%d/%Y",
|
|
166
|
+
"sign": 1, # -1 if the export shows charges as positive
|
|
167
|
+
"skip_rows": 0,
|
|
168
|
+
"external_id_column": None, # column with a bank-side unique id, if any
|
|
169
|
+
},
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
Include a small fabricated CSV fixture (invented merchants, never real
|
|
173
|
+
account data) in `tests/fixtures/` and a test asserting it ingests with the
|
|
174
|
+
right sign convention — see `test_sign_convention_profile` in
|
|
175
|
+
[`tests/test_ingest.py`](tests/test_ingest.py) for the pattern.
|
|
176
|
+
|
|
177
|
+
## Idempotency
|
|
178
|
+
|
|
179
|
+
Re-importing a file, overlapping export ranges, and sync + file import of the
|
|
180
|
+
same period all produce zero duplicates (tested in `tests/test_ingest.py` and
|
|
181
|
+
`tests/test_sync.py`).
|
|
182
|
+
|
|
183
|
+
**Design note — one deliberate deviation from the spec:** the spec folds
|
|
184
|
+
FITID into `dedupe_hash` when present. Done literally, that would *create*
|
|
185
|
+
duplicates in mixed mode: a CSV row (no FITID) and a SimpleFIN row (with id)
|
|
186
|
+
for the same transaction would hash differently. Instead:
|
|
187
|
+
|
|
188
|
+
- `dedupe_hash = sha256(account_id | posted_date | amount_cents | merchant_raw | occurrence_index)`
|
|
189
|
+
with occurrence counting — the Nth identical row in a batch is a duplicate
|
|
190
|
+
only if the DB already holds more than N such rows. Two genuinely distinct
|
|
191
|
+
same-day, same-amount, same-merchant transactions survive because they
|
|
192
|
+
arrive in the same export with occurrence indexes 0 and 1.
|
|
193
|
+
- Bank-side ids (OFX FITID, SimpleFIN txn id) are stored in `external_id`
|
|
194
|
+
with a unique per-account index, short-circuit re-imports, and are
|
|
195
|
+
backfilled onto rows that originally arrived without one.
|
|
196
|
+
|
|
197
|
+
This satisfies every acceptance test, including both orders of mixed-mode.
|
|
198
|
+
Caveat: cross-source dedupe matches on the raw description, so it works when
|
|
199
|
+
both sources export the same description string (typical for OFX/SimpleFIN
|
|
200
|
+
from the same institution).
|
|
201
|
+
|
|
202
|
+
## Security invariants
|
|
203
|
+
|
|
204
|
+
- `data/`, `*.db`, `*.csv`, `*.ofx`, `*.qfx`, `.env`, `analysis/`, and
|
|
205
|
+
`*.ipynb` gitignored from the first commit; test fixtures and `demo`
|
|
206
|
+
data are fabricated only.
|
|
207
|
+
- Account numbers are never parsed: the OFX reader and SimpleFIN connector
|
|
208
|
+
drop `ACCTID`/`BANKID`-class fields at parse time. Only short labels
|
|
209
|
+
("US Checking") identify accounts. Asserted in `tests/test_security.py`.
|
|
210
|
+
- The model gets full transaction detail through `run_sql` — by design. What
|
|
211
|
+
it can never see is what the DB never contains: account numbers,
|
|
212
|
+
credentials, raw export files.
|
|
213
|
+
- `run_sql`: read-only connection (`mode=ro` URI), single-statement
|
|
214
|
+
SELECT/WITH only, keyword denylist, SQLite authorizer denying everything
|
|
215
|
+
but reads, 200-row cap, 5-second time limit, statement/result size limits.
|
|
216
|
+
Literals and comments are stripped before the keyword scan (a merchant
|
|
217
|
+
named "UPDATE" is not a false positive); the authorizer and read-only mode
|
|
218
|
+
are the real guards. Tested with hostile inputs.
|
|
219
|
+
- SimpleFIN access URL from `SIMPLEFIN_ACCESS_URL` or a `0600` config file
|
|
220
|
+
only — never the repo, the DB, or the LLM context. `https` is required,
|
|
221
|
+
HTTP redirects are refused (credentials are never replayed to another
|
|
222
|
+
host), and loose file permissions produce a warning.
|
|
223
|
+
- New database files are created owner-only (`0600`).
|
|
224
|
+
- `ANTHROPIC_API_KEY` from env only; LLM steps fail loudly without it,
|
|
225
|
+
everything else runs keyless.
|
|
226
|
+
|
|
227
|
+
## Tests
|
|
228
|
+
|
|
229
|
+
```sh
|
|
230
|
+
uv run pytest
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
The suite covers the acceptance checklist: mixed-mode dedupe in both
|
|
234
|
+
orders, quarantine of malformed rows, integer-cents math, per-currency
|
|
235
|
+
reporting, `run_sql` hardening against hostile inputs, recurring detection
|
|
236
|
+
with gap tolerance, the MCP tools, the demo seeder, and the security
|
|
237
|
+
invariants above.
|
|
238
|
+
|
|
239
|
+
## License
|
|
240
|
+
|
|
241
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""Durable account metadata used to interpret financial activity."""
|
|
2
|
+
|
|
3
|
+
import sqlite3
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
PURPOSES = ("personal", "business", "mixed", "unknown")
|
|
7
|
+
ANALYSIS_TREATMENTS = ("include", "monitor_only", "exclude")
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def set_context(
|
|
11
|
+
conn: sqlite3.Connection,
|
|
12
|
+
account_name: str,
|
|
13
|
+
*,
|
|
14
|
+
purpose: str | None = None,
|
|
15
|
+
entity_name: str | None = None,
|
|
16
|
+
business_use_percent: int | None = None,
|
|
17
|
+
context_note: str | None = None,
|
|
18
|
+
analysis_treatment: str | None = None,
|
|
19
|
+
) -> dict[str, Any]:
|
|
20
|
+
"""Update interpretive metadata without changing bank-sourced fields."""
|
|
21
|
+
row = conn.execute("SELECT * FROM accounts WHERE name = ?", (account_name,)).fetchone()
|
|
22
|
+
if not row:
|
|
23
|
+
raise ValueError(f"unknown account: {account_name}")
|
|
24
|
+
if purpose is not None and purpose not in PURPOSES:
|
|
25
|
+
raise ValueError(f"purpose must be one of {', '.join(PURPOSES)}")
|
|
26
|
+
if business_use_percent is not None and not 0 <= business_use_percent <= 100:
|
|
27
|
+
raise ValueError("business_use_percent must be between 0 and 100")
|
|
28
|
+
if analysis_treatment is not None and analysis_treatment not in ANALYSIS_TREATMENTS:
|
|
29
|
+
raise ValueError(
|
|
30
|
+
f"analysis_treatment must be one of {', '.join(ANALYSIS_TREATMENTS)}"
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
updates: dict[str, object | None] = {}
|
|
34
|
+
if purpose is not None:
|
|
35
|
+
updates["purpose"] = purpose
|
|
36
|
+
if business_use_percent is None:
|
|
37
|
+
if purpose == "personal":
|
|
38
|
+
updates["business_use_percent"] = 0
|
|
39
|
+
elif purpose == "business":
|
|
40
|
+
updates["business_use_percent"] = 100
|
|
41
|
+
elif purpose == "unknown":
|
|
42
|
+
updates["business_use_percent"] = None
|
|
43
|
+
if entity_name is not None:
|
|
44
|
+
updates["entity_name"] = entity_name.strip() or None
|
|
45
|
+
if business_use_percent is not None:
|
|
46
|
+
updates["business_use_percent"] = business_use_percent
|
|
47
|
+
if context_note is not None:
|
|
48
|
+
updates["context_note"] = context_note.strip() or None
|
|
49
|
+
if analysis_treatment is not None:
|
|
50
|
+
updates["analysis_treatment"] = analysis_treatment
|
|
51
|
+
if not updates:
|
|
52
|
+
raise ValueError("provide at least one account metadata field to update")
|
|
53
|
+
|
|
54
|
+
assignments = ", ".join(f"{column} = ?" for column in updates)
|
|
55
|
+
conn.execute(
|
|
56
|
+
f"UPDATE accounts SET {assignments} WHERE id = ?",
|
|
57
|
+
[*updates.values(), row["id"]],
|
|
58
|
+
)
|
|
59
|
+
conn.commit()
|
|
60
|
+
return dict(conn.execute("SELECT * FROM accounts WHERE id = ?", (row["id"],)).fetchone())
|