dinary 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dinary-0.0.1/.coveragerc +17 -0
- dinary-0.0.1/.github/.plans/architecture.md +646 -0
- dinary-0.0.1/.github/.plans/task.md +135 -0
- dinary-0.0.1/.github/workflows/ci.yml +132 -0
- dinary-0.0.1/.github/workflows/docs.yml +61 -0
- dinary-0.0.1/.github/workflows/pip_publish.yml +91 -0
- dinary-0.0.1/.github/workflows/static.yml +34 -0
- dinary-0.0.1/.gitignore +19 -0
- dinary-0.0.1/.pre-commit-config.yaml +74 -0
- dinary-0.0.1/LICENSE.txt +20 -0
- dinary-0.0.1/PKG-INFO +83 -0
- dinary-0.0.1/README.md +46 -0
- dinary-0.0.1/activate.sh +61 -0
- dinary-0.0.1/docs/includes/install_pipx_macos.sh +2 -0
- dinary-0.0.1/docs/mkdocs.yml +73 -0
- dinary-0.0.1/docs/src/en/images/about.jpg +0 -0
- dinary-0.0.1/docs/src/en/index.md +16 -0
- dinary-0.0.1/docs/src/en/installation.md +11 -0
- dinary-0.0.1/docs/src/en/reference.md +4 -0
- dinary-0.0.1/docs/src/ru/index.md +11 -0
- dinary-0.0.1/docs/src/ru/installation.md +11 -0
- dinary-0.0.1/invoke.yml +5 -0
- dinary-0.0.1/pyproject.toml +59 -0
- dinary-0.0.1/pytest.ini +2 -0
- dinary-0.0.1/scripts/__init__.py +0 -0
- dinary-0.0.1/scripts/build-docs.sh +29 -0
- dinary-0.0.1/scripts/build.sh +3 -0
- dinary-0.0.1/scripts/docs-render-config.sh +15 -0
- dinary-0.0.1/scripts/upload.sh +6 -0
- dinary-0.0.1/scripts/verup.sh +93 -0
- dinary-0.0.1/scripts/verup_action.sh +88 -0
- dinary-0.0.1/src/dinary/__about__.py +1 -0
- dinary-0.0.1/src/dinary/__init__.py +8 -0
- dinary-0.0.1/src/dinary/main.py +33 -0
- dinary-0.0.1/tasks.py +84 -0
- dinary-0.0.1/tests/test_dinary.py +14 -0
- dinary-0.0.1/uv.lock +1039 -0
dinary-0.0.1/.coveragerc
ADDED
|
@@ -0,0 +1,646 @@
|
|
|
1
|
+
# Architecture
|
|
2
|
+
|
|
3
|
+
### Overview
|
|
4
|
+
|
|
5
|
+
A personal expense tracking system for a single user living in Serbia.
|
|
6
|
+
Receipts are entered via mobile (QR scan or manual), stored in a local database with item-level granularity, automatically categorized,
|
|
7
|
+
and analyzed through dashboards and AI-powered insights.
|
|
8
|
+
|
|
9
|
+
The system is designed to be built incrementally as a vibe-coding project by the user (an experienced developer),
|
|
10
|
+
prioritizing clean data model and scriptability over UI polish.
|
|
11
|
+
|
|
12
|
+
### Repositories
|
|
13
|
+
|
|
14
|
+
| Repository | Language | Role |
|
|
15
|
+
|---|---|-----------------------------------------------------------------------------------------------------------------------------------|
|
|
16
|
+
| **dinary** | Python (FastAPI + DuckDB) | Backend — REST API, data storage, rule-based classification, dashboards, Google Sheets sync. Manuals & configs to setup frontend. |
|
|
17
|
+
| **dinary-analyst** | Rust | Local desktop tool — AI classification and spending analysis via `claude -p`, communicates with dinary-server API |
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
## Data Layer: DuckDB, Partitioned by Year
|
|
22
|
+
|
|
23
|
+
### Why DuckDB
|
|
24
|
+
|
|
25
|
+
- Single-file embedded database, zero configuration, runs everywhere (laptop, VPS, Raspberry Pi).
|
|
26
|
+
- First-class analytical SQL: window functions, PIVOT/UNPIVOT, native Parquet/CSV/JSON import and export.
|
|
27
|
+
- ATTACH allows querying multiple year-files simultaneously for cross-year comparisons.
|
|
28
|
+
- Python-native: `import duckdb` — no server, no driver, no ORM needed.
|
|
29
|
+
- At the expected scale (~30K item rows/year), every query completes in milliseconds.
|
|
30
|
+
|
|
31
|
+
### Partitioning Strategy
|
|
32
|
+
|
|
33
|
+
One DuckDB file per year:
|
|
34
|
+
|
|
35
|
+
```
|
|
36
|
+
data/
|
|
37
|
+
├── budget_2025.duckdb
|
|
38
|
+
├── budget_2026.duckdb
|
|
39
|
+
├── config.duckdb # categories, groups, stores, family, events, tags, rules — shared across years
|
|
40
|
+
└── archive/
|
|
41
|
+
└── budget_2024.duckdb
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
**Yearly files** contain transactional data (expenses, receipts, income).
|
|
45
|
+
|
|
46
|
+
**config.duckdb** contains classification metadata (categories, groups, stores, family members, events, tags, rules)
|
|
47
|
+
that is shared across all years and evolves independently of the transactional data.
|
|
48
|
+
|
|
49
|
+
Archiving a year = moving the file to `archive/`. Cross-year queries use ATTACH:
|
|
50
|
+
|
|
51
|
+
```sql
|
|
52
|
+
ATTACH 'data/budget_2025.duckdb' AS y2025;
|
|
53
|
+
ATTACH 'data/budget_2026.duckdb' AS y2026;
|
|
54
|
+
|
|
55
|
+
SELECT * FROM y2025.main.expenses
|
|
56
|
+
UNION ALL
|
|
57
|
+
SELECT * FROM y2026.main.expenses;
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### Schema
|
|
61
|
+
|
|
62
|
+
#### config.duckdb — Classification & Reference Data
|
|
63
|
+
|
|
64
|
+
```sql
|
|
65
|
+
-- Category groups: high-level budget buckets for aggregated views.
|
|
66
|
+
-- Examples: здоровье, транспорт, жильё, питание, развлечения.
|
|
67
|
+
CREATE TABLE category_groups (
|
|
68
|
+
id INTEGER PRIMARY KEY,
|
|
69
|
+
name TEXT NOT NULL UNIQUE,
|
|
70
|
+
monthly_budget_eur DECIMAL(10,2) -- optional planned budget per month
|
|
71
|
+
);
|
|
72
|
+
|
|
73
|
+
-- Specific expense categories: what was bought.
|
|
74
|
+
-- Examples: фрукты, топливо, медицина, кафе, аренда.
|
|
75
|
+
-- Each category belongs to exactly one group (or none).
|
|
76
|
+
CREATE TABLE categories (
|
|
77
|
+
id INTEGER PRIMARY KEY,
|
|
78
|
+
name TEXT NOT NULL UNIQUE,
|
|
79
|
+
group_id INTEGER REFERENCES category_groups(id) -- nullable
|
|
80
|
+
);
|
|
81
|
+
|
|
82
|
+
-- Stores: normalized store names.
|
|
83
|
+
-- Useful for analytics: "how much do I spend at Maxi vs Lidl".
|
|
84
|
+
CREATE TABLE stores (
|
|
85
|
+
id INTEGER PRIMARY KEY,
|
|
86
|
+
name TEXT NOT NULL UNIQUE,
|
|
87
|
+
store_type TEXT -- 'supermarket' | 'pharmacy' | 'gas_station' | 'online' | etc.
|
|
88
|
+
);
|
|
89
|
+
|
|
90
|
+
-- Family members: who the expense is for (beneficiary).
|
|
91
|
+
-- Short fixed list. Default = 'семья' (whole family / unspecified).
|
|
92
|
+
CREATE TABLE family_members (
|
|
93
|
+
id INTEGER PRIMARY KEY,
|
|
94
|
+
name TEXT NOT NULL UNIQUE -- 'семья', 'Андрей', 'Лариса', 'Аня', 'собака'
|
|
95
|
+
);
|
|
96
|
+
|
|
97
|
+
-- Events: temporary situations with their own spending that should be trackable separately.
|
|
98
|
+
-- A trip, a renovation project, hosting guests, a camp for the child, etc.
|
|
99
|
+
-- Events have date ranges (informational, used for auto-suggestion at entry time)
|
|
100
|
+
-- and participants (which family members are involved).
|
|
101
|
+
-- Multiple events can overlap in time (e.g., parents' trip + child's camp).
|
|
102
|
+
CREATE TABLE events (
|
|
103
|
+
id INTEGER PRIMARY KEY,
|
|
104
|
+
name TEXT NOT NULL, -- "Босния Сараево+Мостар", "Дивчибаре", "Аня соревнования Гамбург"
|
|
105
|
+
date_from DATE NOT NULL,
|
|
106
|
+
date_to DATE NOT NULL,
|
|
107
|
+
is_active BOOLEAN DEFAULT true, -- false = archived, hidden from entry UI
|
|
108
|
+
comment TEXT
|
|
109
|
+
);
|
|
110
|
+
|
|
111
|
+
-- Event participants: which family members are part of each event.
|
|
112
|
+
CREATE TABLE event_members (
|
|
113
|
+
event_id INTEGER NOT NULL REFERENCES events(id),
|
|
114
|
+
member_id INTEGER NOT NULL REFERENCES family_members(id),
|
|
115
|
+
PRIMARY KEY (event_id, member_id)
|
|
116
|
+
);
|
|
117
|
+
|
|
118
|
+
-- Tags: small fixed set of flags for cross-cutting concerns.
|
|
119
|
+
-- NOT for things that are better modeled as category, group, beneficiary, or event.
|
|
120
|
+
-- Examples: 'релокация' (extra cost of living abroad), 'профессиональное' (work-related).
|
|
121
|
+
-- Expected count: 2-5, practically never grows.
|
|
122
|
+
CREATE TABLE tags (
|
|
123
|
+
id INTEGER PRIMARY KEY,
|
|
124
|
+
name TEXT NOT NULL UNIQUE
|
|
125
|
+
);
|
|
126
|
+
|
|
127
|
+
-- Pattern-based auto-classification of receipt line items.
|
|
128
|
+
-- Patterns are matched against item names from parsed receipts.
|
|
129
|
+
-- Priority: lower number = higher priority (checked first).
|
|
130
|
+
-- Note: for Serbian receipt items (e.g., "AJDARED 1KG", "JAB ZELENA"), rule-based matching
|
|
131
|
+
-- has limited effectiveness. Primary classification mechanism is AI (see Classification Layer).
|
|
132
|
+
-- Rules capture the easy wins and grow over time from confirmed AI suggestions.
|
|
133
|
+
CREATE TABLE category_rules (
|
|
134
|
+
id INTEGER PRIMARY KEY,
|
|
135
|
+
pattern TEXT NOT NULL, -- substring or regex matched against item name
|
|
136
|
+
category_id INTEGER NOT NULL REFERENCES categories(id),
|
|
137
|
+
priority INTEGER DEFAULT 100,
|
|
138
|
+
created_by TEXT DEFAULT 'manual' -- 'manual' | 'ai' — tracks rule origin
|
|
139
|
+
);
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
#### budget_YYYY.duckdb — Yearly Transactional Data
|
|
143
|
+
|
|
144
|
+
```sql
|
|
145
|
+
-- The single source of truth for all spending.
|
|
146
|
+
-- Every expense — whether parsed from a QR receipt, or manually entered (café, taxi, haircut) —
|
|
147
|
+
-- is a row in this table.
|
|
148
|
+
-- A manual expense is simply a row with no receipt_id and a hand-picked category.
|
|
149
|
+
--
|
|
150
|
+
-- Five orthogonal dimensions on each expense, each answering a different question:
|
|
151
|
+
-- category_id → what was bought (→ category_groups via categories.group_id)
|
|
152
|
+
-- beneficiary_id → for whom (family_members; default = 'семья')
|
|
153
|
+
-- event_id → within which event (events; nullable)
|
|
154
|
+
-- tags → cross-cutting flags (via expense_tags; e.g., релокация, профессиональное)
|
|
155
|
+
-- store_id → where purchased (stores; nullable)
|
|
156
|
+
CREATE TABLE expenses (
|
|
157
|
+
id TEXT PRIMARY KEY, -- UUID or ULID
|
|
158
|
+
datetime TIMESTAMP NOT NULL, -- when the purchase happened
|
|
159
|
+
name TEXT NOT NULL, -- item name (from receipt) or description (manual entry)
|
|
160
|
+
quantity DECIMAL(10,3), -- nullable for manual entries
|
|
161
|
+
unit_price DECIMAL(10,2), -- nullable for manual entries
|
|
162
|
+
amount DECIMAL(10,2) NOT NULL,-- total for this line: quantity × unit_price, or manual amount
|
|
163
|
+
currency TEXT DEFAULT 'RSD', -- ISO 4217: RSD, EUR, BAM, etc.
|
|
164
|
+
category_id INTEGER, -- FK to config.categories (nullable until classified)
|
|
165
|
+
beneficiary_id INTEGER, -- FK to config.family_members (nullable → defaults to 'семья')
|
|
166
|
+
event_id INTEGER, -- FK to config.events (nullable)
|
|
167
|
+
store_id INTEGER, -- FK to config.stores (nullable for non-store expenses)
|
|
168
|
+
receipt_id TEXT, -- FK to receipts.id (nullable; manual entries have none)
|
|
169
|
+
comment TEXT,
|
|
170
|
+
ai_category_suggestion TEXT, -- raw AI suggestion, stored for review
|
|
171
|
+
classification_status TEXT DEFAULT 'pending' -- 'pending' | 'auto' | 'ai_suggested' | 'confirmed'
|
|
172
|
+
);
|
|
173
|
+
|
|
174
|
+
-- Many-to-many: an expense can have multiple tags (e.g., both 'релокация' and 'профессиональное').
|
|
175
|
+
CREATE TABLE expense_tags (
|
|
176
|
+
expense_id TEXT NOT NULL REFERENCES expenses(id),
|
|
177
|
+
tag_id INTEGER NOT NULL REFERENCES config.tags(id), -- cross-db FK (enforced in app logic)
|
|
178
|
+
PRIMARY KEY (expense_id, tag_id)
|
|
179
|
+
);
|
|
180
|
+
|
|
181
|
+
-- Raw receipt archive. NOT used in analytical queries — no JOINs needed.
|
|
182
|
+
-- Exists purely to preserve the original data for reproducibility and debugging.
|
|
183
|
+
CREATE TABLE receipts (
|
|
184
|
+
id TEXT PRIMARY KEY,
|
|
185
|
+
datetime TIMESTAMP NOT NULL,
|
|
186
|
+
store_id INTEGER,
|
|
187
|
+
total DECIMAL(12,2),
|
|
188
|
+
currency TEXT DEFAULT 'RSD',
|
|
189
|
+
raw_url TEXT, -- SUF PURS URL from QR code
|
|
190
|
+
raw_html TEXT, -- cached HTML page from tax authority
|
|
191
|
+
created_at TIMESTAMP DEFAULT current_timestamp
|
|
192
|
+
);
|
|
193
|
+
|
|
194
|
+
CREATE TABLE income (
|
|
195
|
+
id TEXT PRIMARY KEY,
|
|
196
|
+
date DATE NOT NULL,
|
|
197
|
+
amount DECIMAL(12,2) NOT NULL,
|
|
198
|
+
currency TEXT DEFAULT 'RSD',
|
|
199
|
+
source TEXT DEFAULT 'salary', -- 'salary' | 'bonus' | 'freelance' | 'espp' | etc.
|
|
200
|
+
comment TEXT
|
|
201
|
+
);
|
|
202
|
+
|
|
203
|
+
CREATE TABLE exchange_rates (
|
|
204
|
+
currency TEXT NOT NULL, -- source currency (e.g., 'RSD')
|
|
205
|
+
target TEXT DEFAULT 'EUR',
|
|
206
|
+
rate DECIMAL(12,6) NOT NULL, -- 1 unit of currency = rate units of target
|
|
207
|
+
valid_from DATE NOT NULL,
|
|
208
|
+
valid_to DATE,
|
|
209
|
+
PRIMARY KEY (currency, target, valid_from)
|
|
210
|
+
);
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
### Five Orthogonal Dimensions
|
|
214
|
+
|
|
215
|
+
The current spreadsheet mixes several unrelated concepts in the "envelope" field: hierarchical grouping (здоровье = медицина + БАД + лекарства),
|
|
216
|
+
beneficiary (ребенок, лариса), temporary context (путешествия), expense purpose (профессиональное), and relocation overhead (релокация).
|
|
217
|
+
This leads to duplicated category rows and makes cross-cutting analysis impossible.
|
|
218
|
+
|
|
219
|
+
The new model separates five independent dimensions:
|
|
220
|
+
|
|
221
|
+
```
|
|
222
|
+
expense row
|
|
223
|
+
│
|
|
224
|
+
├── category_id ──→ categories ──→ category_groups WHAT (фрукты → питание)
|
|
225
|
+
│
|
|
226
|
+
├── beneficiary_id ──→ family_members FOR WHOM (Аня, Лариса, собака, семья)
|
|
227
|
+
│
|
|
228
|
+
├── event_id ──→ events ←── event_members WITHIN WHAT (поездка в Боснию, лагерь Ани)
|
|
229
|
+
│
|
|
230
|
+
├── expense_tags ──→ tags WHY SPECIAL (релокация, профессиональное)
|
|
231
|
+
│
|
|
232
|
+
└── store_id ──→ stores WHERE (Maxi, Lidl, онлайн)
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
**"How much on fruit?"** → category = фрукты.
|
|
236
|
+
**"How much on health?"** → group = здоровье (медицина + БАД + лекарства + спорт).
|
|
237
|
+
**"How much for the child?"** → beneficiary = Аня.
|
|
238
|
+
**"How much on the Bosnia trip, and on what?"** → event = Босния, GROUP BY category.
|
|
239
|
+
**"How much on fruit during the Bosnia trip?"** → category = фрукты AND event = Босния.
|
|
240
|
+
**"All trips this year?"** → SELECT * FROM events WHERE year = 2026.
|
|
241
|
+
**"All trips the child went on?"** → events JOIN event_members WHERE member = Аня.
|
|
242
|
+
**"How much does relocation cost me?"** → tag = релокация.
|
|
243
|
+
**"How much on professional subscriptions?"** → tag = профессиональное AND group = подписки.
|
|
244
|
+
|
|
245
|
+
No duplicated rows. Each dimension is independent and composable with any other.
|
|
246
|
+
|
|
247
|
+
### Event Auto-assignment at Entry Time
|
|
248
|
+
|
|
249
|
+
When a new expense is entered, the system checks if its date falls within any active event's date range:
|
|
250
|
+
|
|
251
|
+
- **Zero matching events** → event_id = NULL (no suggestion).
|
|
252
|
+
- **One matching event** → auto-assigned to that event. User sees it and can remove.
|
|
253
|
+
- **Multiple matching events** (overlapping dates) → dropdown list of matching events. User picks one, or none.
|
|
254
|
+
|
|
255
|
+
Manual override in both directions: assign an expense to an event outside its date range (fueling up before a trip),
|
|
256
|
+
or remove auto-assignment (a regular grocery run during a trip that shouldn't count).
|
|
257
|
+
|
|
258
|
+
### Key Design Decisions
|
|
259
|
+
|
|
260
|
+
**Raw data is immutable; classification is a layer on top.** Expenses store the original name and amount.
|
|
261
|
+
Category, beneficiary, event, and tags can be changed at any time without touching the expense's core data.
|
|
262
|
+
|
|
263
|
+
**Category group is derived, not stored on expenses.** An expense's group is resolved via `category_id → category.group_id`.
|
|
264
|
+
Changing a category's group assignment instantly affects all historical data.
|
|
265
|
+
|
|
266
|
+
**Beneficiary has a default.** If `beneficiary_id` is NULL, it means "семья" (whole family / general).
|
|
267
|
+
This is the common case — only expenses specifically for one person need explicit assignment.
|
|
268
|
+
|
|
269
|
+
**Events are archivable.** Once a trip/event is over, `is_active = false` hides it from the entry UI dropdown but preserves all data.
|
|
270
|
+
Past events are accessible in analytics and can be reactivated if needed.
|
|
271
|
+
|
|
272
|
+
**Tags are a tiny fixed set.** Unlike events (which grow by ~5-10/year) or categories (which grow with QR parsing),
|
|
273
|
+
tags are 2-5 conceptual flags that practically never change.
|
|
274
|
+
They mark structural circumstances (relocation, professional use), not temporal events or beneficiaries.
|
|
275
|
+
|
|
276
|
+
**One table for all expenses.** A café bill for 500 RSD and a line item from a supermarket receipt are both rows in `expenses`.
|
|
277
|
+
The difference: the café entry has no `receipt_id`, no `quantity`, and was manually categorized at entry time.
|
|
278
|
+
|
|
279
|
+
**Receipts table is an archive, not a parent.** `receipts` stores raw HTML and URL for reproducibility.
|
|
280
|
+
It is never JOINed in analytical queries. All fields needed for analytics (datetime, store_id, etc.) are denormalized onto each expense row.
|
|
281
|
+
|
|
282
|
+
**Stores are normalized.** `store_id` on expenses, with a lookup table in config.duckdb.
|
|
283
|
+
The receipt parser maps variant spellings ("MAXI", "Maxi DOO", "MAXI SOMBOR") to a single store_id.
|
|
284
|
+
|
|
285
|
+
---
|
|
286
|
+
|
|
287
|
+
## Input Layer
|
|
288
|
+
|
|
289
|
+
### Receipt Scanning (Serbian Fiscal QR Codes)
|
|
290
|
+
|
|
291
|
+
Serbian fiscal receipts contain a QR code with a URL to `suf.purs.gov.rs`. The HTML page contains all line items with names, quantities, and prices.
|
|
292
|
+
|
|
293
|
+
**Existing open-source parsers:**
|
|
294
|
+
|
|
295
|
+
- [Innovigo/sr-invoice-parser](https://github.com/Innovigo/sr-invoice-parser) — Python library that crawls the SUF PURS page and extracts items as structured data (name, quantity, price, total_price). MIT license.
|
|
296
|
+
- [turanjanin/serbian-fiscal-receipts-parser](https://github.com/turanjanin/serbian-fiscal-receipts-parser) — PHP library for the same purpose.
|
|
297
|
+
|
|
298
|
+
**Flow:**
|
|
299
|
+
|
|
300
|
+
1. User scans QR code on phone → extracts URL.
|
|
301
|
+
2. URL is sent to the backend.
|
|
302
|
+
3. Backend fetches the HTML page from SUF PURS, parses line items.
|
|
303
|
+
4. Raw HTML is cached in `receipts.raw_html` for reproducibility.
|
|
304
|
+
5. Each line item is inserted into `expenses` with `classification_status = 'pending'`.
|
|
305
|
+
6. Category rules are applied immediately (pattern matching); matched items get `classification_status = 'auto'`.
|
|
306
|
+
7. Unmatched items remain `'pending'` for batch AI classification (see below).
|
|
307
|
+
|
|
308
|
+
### Manual Entry
|
|
309
|
+
|
|
310
|
+
For expenses without QR codes (cafés, services, cash payments, foreign purchases):
|
|
311
|
+
- User enters: amount, category (from list), optional comment.
|
|
312
|
+
- Stored in `expenses` — same table as parsed receipt items, just without `receipt_id`, `quantity`, or `unit_price`.
|
|
313
|
+
- Category is assigned at entry time (user picks from a list or types a shortcut).
|
|
314
|
+
|
|
315
|
+
### Mobile Input Interface (dinary-app)
|
|
316
|
+
|
|
317
|
+
The specific mobile client is a build-time decision.
|
|
318
|
+
The architecture is agnostic — the input layer is a thin client that sends structured data to the backend via a simple REST API.
|
|
319
|
+
Key functional requirements regardless of the chosen tool:
|
|
320
|
+
|
|
321
|
+
- Camera access for QR scanning.
|
|
322
|
+
- Fast manual entry: amount + category selector + optional comment, one tap to submit.
|
|
323
|
+
- Event selector: if the expense date falls within an active event's date range, auto-suggest it. If multiple active events overlap, show a dropdown. Allow manual assignment/removal.
|
|
324
|
+
- Beneficiary selector: defaults to "семья", quick switch to a specific family member.
|
|
325
|
+
- Confirmation screen after QR scan: shows parsed items, allows quick category corrections before saving.
|
|
326
|
+
|
|
327
|
+
#### Frontend Tool Evaluation (Phase 3 prerequisite)
|
|
328
|
+
|
|
329
|
+
Before building the mobile input layer, evaluate the candidate tools listed below **and research whether other tools exist** that may fit better.
|
|
330
|
+
The list is a starting point, not exhaustive — the no-code/low-code landscape changes rapidly and there may be newer or niche tools
|
|
331
|
+
that satisfy the requirements better than any of these.
|
|
332
|
+
|
|
333
|
+
Build a minimal MVP with the most promising 1-2 candidates to compare real-world UX before committing.
|
|
334
|
+
|
|
335
|
+
**Initial candidate list:**
|
|
336
|
+
|
|
337
|
+
| Tool | Type | Evaluate for |
|
|
338
|
+
|------|------|-------------|
|
|
339
|
+
| **Telegram Bot** | Chat-based UI | Lowest dev effort. Native camera for QR photo/URL sharing. Inline keyboards for category selection. No app install needed. Limitation: no true "form" UX — interaction is sequential, not a single screen. **Offline: does not work offline — requires internet for every interaction.** |
|
|
340
|
+
| **Glide Apps** | No-code app builder (Google Sheets/SQL backend) | Can it connect to a custom REST API or DuckDB directly? Does it support camera/QR scanning? Free tier limits? Good for rapid prototyping if it can talk to our backend. Check offline support. |
|
|
341
|
+
| **Retool** | Low-code internal tool builder | Strong on forms, tables, and API integration. Mobile-responsive. Free tier (5 users) is sufficient. Can it do QR scanning natively or via a component? Overkill for input-only, but could double as an admin/review UI for classifications. Check offline support — likely none. |
|
|
342
|
+
| **Appsmith** | Open-source Retool alternative | Self-hostable (important for data ownership). Same evaluation criteria as Retool. Check: mobile UX quality, QR scanning support, DuckDB/REST connectivity, offline mode. |
|
|
343
|
+
| **Appgyver (SAP Build Apps)** | No-code native app builder | Produces actual mobile apps. QR scanning is a built-in component. Free tier available. Evaluate: learning curve, API connectivity, ease of iteration. Has offline data storage capabilities. More effort than Telegram but better native UX. |
|
|
344
|
+
| **Tally / Typeform** | Form builders | Good for quick data capture. Tally is free and supports webhooks. Can a form-based flow work for receipt entry? Likely too rigid for the QR→review→confirm flow, but worth checking for manual entry only. No offline support. |
|
|
345
|
+
| **PWA (custom)** | Self-built Progressive Web App | Maximum control. Camera API for QR scanning (via `navigator.mediaDevices`). Full offline support via Service Workers + IndexedDB. Requires actual frontend development. Best long-term option if no-code tools don't fit. Works on both Android and iOS via browser. |
|
|
346
|
+
|
|
347
|
+
**Evaluation criteria:**
|
|
348
|
+
|
|
349
|
+
Must-have (tool is disqualified if it fails any of these):
|
|
350
|
+
|
|
351
|
+
0. **No mobile app to publish** - avoid creating custom app that we have to sign and send for review by Apple / Google.
|
|
352
|
+
1. **Offline operation with guaranteed data persistence** — the app must work without internet. Entered data must be stored locally on the device and synced to the backend when connectivity is restored. Data loss due to network unavailability is unacceptable — this is the primary data entry point.
|
|
353
|
+
2. **Cross-platform: Android & iOS** — must work on both platforms (native app, PWA, or responsive web).
|
|
354
|
+
3. **API connectivity** — must be able to POST structured data to a custom REST endpoint.
|
|
355
|
+
4. **Free for expected load** — sustainable at zero cost for a single user with 10-20 entries/day. No "free trial" that expires.
|
|
356
|
+
5. **Longevity / sustainability** — the tool must have a credible future. For open-source: sufficient community (contributors, stars, release cadence). For commercial: a clear business model and track record suggesting the free tier won't be killed. Tools that have recently been acquired, pivoted, or deprecated their free tier are high-risk.
|
|
357
|
+
|
|
358
|
+
Important:
|
|
359
|
+
6. **QR scanning** — can the tool scan a QR code and extract the URL? (must-have for Phase 3b, not required for MVP)
|
|
360
|
+
7. **Speed of entry** — how many taps/screens for a manual expense? (critical for daily use adoption)
|
|
361
|
+
8. **Dev effort for MVP** — how fast can a working prototype be built?
|
|
362
|
+
|
|
363
|
+
Nice-to-have:
|
|
364
|
+
9. **Self-hostable / data ownership** — does data pass through third-party servers?
|
|
365
|
+
10. **Extensibility** — can it grow into the review/classification UI later?
|
|
366
|
+
|
|
367
|
+
---
|
|
368
|
+
|
|
369
|
+
## Classification Layer
|
|
370
|
+
|
|
371
|
+
### Three-tier Classification
|
|
372
|
+
|
|
373
|
+
**Tier 1: Rule-based (instant, free).** `category_rules` table contains patterns (substrings or regexes) matched against item names.
|
|
374
|
+
Example: pattern `MLEKO` matches category "Dairy", pattern `SREDSTVO ZA` matches "Household chemicals".
|
|
375
|
+
Rules are applied immediately when items are ingested. This handles the majority of repeat purchases after an initial learning period.
|
|
376
|
+
|
|
377
|
+
**Tier 2: AI batch classification (deferred, economical).** Unclassified items (`classification_status = 'pending'`) accumulate on dinary-server throughout the day.
|
|
378
|
+
When the user runs dinary-analyst (manually or via scheduler), it fetches pending items from the server API and classifies them using `claude -p`:
|
|
379
|
+
|
|
380
|
+
```bash
|
|
381
|
+
# dinary-analyst fetches pending items from dinary-server
|
|
382
|
+
dinary-analyst classify
|
|
383
|
+
|
|
384
|
+
# Under the hood:
|
|
385
|
+
# 1. GET https://server/api/tasks/pending-classifications
|
|
386
|
+
# 2. Feeds items to claude -p with category list and classification prompt
|
|
387
|
+
# 3. POST https://server/api/tasks/classifications with results
|
|
388
|
+
```
|
|
389
|
+
|
|
390
|
+
This runs on the user's laptop under the existing Claude subscription via `claude -p` (Claude Code CLI, non-interactive mode). No API costs.
|
|
391
|
+
Typical batch: 20-50 items, easily fits in a single prompt. dinary-server applies the results to DuckDB.
|
|
392
|
+
|
|
393
|
+
**Tier 3: Manual confirmation.** AI suggestions are stored as `ai_category_suggestion` and `classification_status = 'ai_suggested'`. The user reviews and confirms (or corrects) via the dashboard or a CLI script. Confirmed classifications can optionally generate new rules in `category_rules` (with `created_by = 'ai'`), so similar items are auto-classified in the future.
|
|
394
|
+
|
|
395
|
+
### Rule Learning Loop
|
|
396
|
+
|
|
397
|
+
```
|
|
398
|
+
New item → Rule match? → YES → auto-classify, done
|
|
399
|
+
→ NO → mark 'pending'
|
|
400
|
+
→ AI batch suggests category + rule
|
|
401
|
+
→ User confirms/corrects
|
|
402
|
+
→ New rule added to category_rules
|
|
403
|
+
→ Next time this item appears → auto-classified
|
|
404
|
+
```
|
|
405
|
+
|
|
406
|
+
Over time, the rule table grows and the AI batch shrinks. After a few months, most items are auto-classified; AI handles only genuinely new products.
|
|
407
|
+
|
|
408
|
+
---
|
|
409
|
+
|
|
410
|
+
## Analytics Layer
|
|
411
|
+
|
|
412
|
+
### Operational Dashboard
|
|
413
|
+
|
|
414
|
+
**Purpose:** "How am I doing this month?" — quick glance on the phone.
|
|
415
|
+
|
|
416
|
+
**Content:**
|
|
417
|
+
- Total spent this month vs. total income.
|
|
418
|
+
- Savings rate (income − expenses) / income.
|
|
419
|
+
- Spending by category group with budget progress bars (if budgets are set).
|
|
420
|
+
- Comparison with same month last year and previous month.
|
|
421
|
+
- List of recent unclassified items (items needing attention).
|
|
422
|
+
|
|
423
|
+
**Implementation:** A static HTML page generated from DuckDB by a Python script. Served locally or via a lightweight HTTP server on a VPS. Regenerated after each new receipt or on a schedule (e.g., hourly). No JavaScript framework needed — HTML + CSS + inline SVG for progress bars, or minimal Chart.js.
|
|
424
|
+
|
|
425
|
+
### Analytical Dashboard
|
|
426
|
+
|
|
427
|
+
**Purpose:** "What happened over the past 6 months, and why?"
|
|
428
|
+
|
|
429
|
+
**Content:**
|
|
430
|
+
- Selectable time range (month, quarter, year, custom).
|
|
431
|
+
- Breakdown by category, group, store, beneficiary, event, tag — switchable views.
|
|
432
|
+
- Trend charts: monthly spending per category/group over time.
|
|
433
|
+
- Year-over-year comparison: selected period vs. same period previous year.
|
|
434
|
+
- Top-N items by total spend (item-level drill-down from parsed receipts).
|
|
435
|
+
- Seasonality detection (are there recurring monthly spikes?).
|
|
436
|
+
|
|
437
|
+
**Implementation:** An interactive single-page app (React/vanilla JS + Chart.js/Recharts).
|
|
438
|
+
Data is pre-aggregated by a Python script into a JSON file that the SPA loads. For ad-hoc queries, the user can also run SQL directly against DuckDB.
|
|
439
|
+
The dashboard is a view layer, not a data entry point.
|
|
440
|
+
|
|
441
|
+
### AI Analysis
|
|
442
|
+
|
|
443
|
+
**Purpose:** "What should I pay attention to? What can I optimize?"
|
|
444
|
+
|
|
445
|
+
**Trigger:** On demand, when the user runs dinary-analyst. Not automated — the user decides when to run it.
|
|
446
|
+
|
|
447
|
+
**Flow:**
|
|
448
|
+
1. dinary-analyst fetches aggregated data from dinary-server:
|
|
449
|
+
```bash
|
|
450
|
+
dinary-analyst analyze --period 2026-Q1
|
|
451
|
+
```
|
|
452
|
+
2. Under the hood: fetches data from server API, feeds to `claude -p`, pushes the report back to dinary-server.
|
|
453
|
+
3. The report is stored on the server and optionally displayed in the dashboard.
|
|
454
|
+
|
|
455
|
+
**Cost:** Zero beyond the existing Claude subscription. A quarterly analysis is ~2-3K tokens of input data + prompt — trivial.
|
|
456
|
+
|
|
457
|
+
---
|
|
458
|
+
|
|
459
|
+
## Export Layer: Google Sheets Sync
|
|
460
|
+
|
|
461
|
+
The existing Google Sheets spreadsheet continues to work as a familiar view.
|
|
462
|
+
A Python script (using `gspread` or Google Sheets API directly) runs on demand or on a schedule:
|
|
463
|
+
|
|
464
|
+
1. Queries DuckDB for monthly aggregates by category and group.
|
|
465
|
+
2. Writes the data into the existing sheet format (months as columns, categories as rows).
|
|
466
|
+
3. Updates the income and savings rows.
|
|
467
|
+
|
|
468
|
+
This is a **write-only, one-directional sync**: DuckDB → Google Sheets.
|
|
469
|
+
The spreadsheet becomes a read-only view; all data entry happens through the new system.
|
|
470
|
+
The sync script is idempotent — running it twice produces the same result.
|
|
471
|
+
|
|
472
|
+
---
|
|
473
|
+
|
|
474
|
+
## Deployment: Split Architecture (Backend + Local Agent)
|
|
475
|
+
|
|
476
|
+
### Design Principle
|
|
477
|
+
|
|
478
|
+
The system is split into two parts: an always-on **backend** (VPS) that handles data ingestion and serves dashboards, and a **local agent**
|
|
479
|
+
(user's laptop) that runs expensive AI tasks using the existing Claude subscription via `claude -p`.
|
|
480
|
+
The backend owns the single source of truth (DuckDB).
|
|
481
|
+
The local agent is stateless — it fetches tasks, processes them, and pushes results back.
|
|
482
|
+
|
|
483
|
+
```
|
|
484
|
+
┌──────────────┐ ┌─────────────────────────────────────┐
|
|
485
|
+
│ dinary-app │────────▶│ dinary (VPS) │
|
|
486
|
+
│ (mobile) │ │ │
|
|
487
|
+
│ │◀────────│ FastAPI + DuckDB │
|
|
488
|
+
└──────────────┘ │ - receives expenses from mobile │
|
|
489
|
+
│ - rule-based classification (Tier 1)│
|
|
490
|
+
│ - serves operational dashboard │
|
|
491
|
+
│ - serves analytical dashboard │
|
|
492
|
+
│ - exposes task queue API │
|
|
493
|
+
│ - Google Sheets sync │
|
|
494
|
+
└──────────────┬──────────────────────┘
|
|
495
|
+
│
|
|
496
|
+
task queue API (REST)
|
|
497
|
+
│
|
|
498
|
+
┌──────────────▼──────────────────────┐
|
|
499
|
+
│ dinary-analyst (user's laptop) │
|
|
500
|
+
│ │
|
|
501
|
+
│ Rust binary + claude -p │
|
|
502
|
+
│ - fetch pending classification tasks│
|
|
503
|
+
│ - AI batch classify (claude -p) │
|
|
504
|
+
│ - AI spending analysis (claude -p) │
|
|
505
|
+
│ - push results back to server API │
|
|
506
|
+
│ - any future AI-heavy tasks │
|
|
507
|
+
└─────────────────────────────────────┘
|
|
508
|
+
```
|
|
509
|
+
|
|
510
|
+
### dinary (VPS)
|
|
511
|
+
|
|
512
|
+
**What it does:**
|
|
513
|
+
- Accepts expenses from dinary-app (REST API).
|
|
514
|
+
- Stores everything in DuckDB (config.duckdb + yearly budget files).
|
|
515
|
+
- Applies Tier 1 classification (rule-based pattern matching) immediately on ingestion.
|
|
516
|
+
- Serves operational and analytical dashboards (static HTML or SPA).
|
|
517
|
+
- Syncs aggregated data to Google Sheets on schedule or on demand.
|
|
518
|
+
- Exposes a **task queue API** for the local agent:
|
|
519
|
+
- `GET /api/tasks/pending-classifications` — returns unclassified items as JSON.
|
|
520
|
+
- `POST /api/tasks/classifications` — accepts classification results, updates DuckDB.
|
|
521
|
+
- `GET /api/tasks/analysis-export?period=2026-Q1` — returns aggregated data for AI analysis.
|
|
522
|
+
- `POST /api/tasks/analysis-report` — stores the AI-generated report.
|
|
523
|
+
|
|
524
|
+
**What it does NOT do:**
|
|
525
|
+
- Any AI/LLM calls. All AI work is delegated to dinary-analyst.
|
|
526
|
+
|
|
527
|
+
**Hosting:** Oracle Cloud Free Tier (free ARM VM, 4 cores, 24 GB RAM — permanent free tier). Alternative: any cheap VPS, or even a Raspberry Pi at home with Cloudflare Tunnel for external access.
|
|
528
|
+
|
|
529
|
+
**Accessibility:** Dashboard and API served via Cloudflare Tunnel (free, no public IP needed) or directly from the VPS.
|
|
530
|
+
|
|
531
|
+
### dinary-analyst (User's Laptop)
|
|
532
|
+
|
|
533
|
+
**What it does:**
|
|
534
|
+
- Runs on demand (manually or via scheduler) when the user is at the computer.
|
|
535
|
+
- Fetches pending tasks from the dinary-server API.
|
|
536
|
+
- Processes them using `claude -p` (Claude Code CLI, non-interactive mode) under the user's existing subscription — no API token costs.
|
|
537
|
+
- Pushes results back to the dinary-server API.
|
|
538
|
+
|
|
539
|
+
**Task types:**
|
|
540
|
+
|
|
541
|
+
1. **Batch classification** (daily or on demand):
|
|
542
|
+
```bash
|
|
543
|
+
# Fetch unclassified items from dinary-server
|
|
544
|
+
dinary-analyst classify
|
|
545
|
+
|
|
546
|
+
# Under the hood:
|
|
547
|
+
# 1. GET https://server/api/tasks/pending-classifications → pending.json
|
|
548
|
+
# 2. claude -p "classify these items..." → results.json
|
|
549
|
+
# 3. POST https://server/api/tasks/classifications ← results.json
|
|
550
|
+
```
|
|
551
|
+
|
|
552
|
+
2. **Spending analysis** (weekly/monthly/on demand):
|
|
553
|
+
```bash
|
|
554
|
+
dinary-analyst analyze --period 2026-Q1
|
|
555
|
+
|
|
556
|
+
# Under the hood:
|
|
557
|
+
# 1. GET https://server/api/tasks/analysis-export?period=2026-Q1 → data.json
|
|
558
|
+
# 2. claude -p "analyze this spending data..." → report.md
|
|
559
|
+
# 3. POST https://server/api/tasks/analysis-report ← report
|
|
560
|
+
```
|
|
561
|
+
|
|
562
|
+
3. **Future AI tasks** — any new AI-intensive operation follows the same pattern: dinary-server exposes a task endpoint, dinary-analyst fetches, processes with `claude -p`, pushes results back.
|
|
563
|
+
|
|
564
|
+
**Built as a Rust binary** — single executable, no runtime dependencies, compact installer for macOS and Windows.
|
|
565
|
+
|
|
566
|
+
### Backup Strategy
|
|
567
|
+
|
|
568
|
+
- DuckDB files on the VPS (dinary-server) are the primary copy.
|
|
569
|
+
- Periodic backup to user's laptop: `rsync` or `scp` of DuckDB files.
|
|
570
|
+
- Periodic Parquet export for maximum portability: `COPY expenses TO 'expenses_2026.parquet' (FORMAT parquet);`
|
|
571
|
+
- Git for the codebase (scripts, config). Data files excluded from git, backed up separately.
|
|
572
|
+
|
|
573
|
+
### Security
|
|
574
|
+
|
|
575
|
+
- dinary-server API protected by API key or mutual TLS (single user, no need for full auth system).
|
|
576
|
+
- Cloudflare Tunnel provides HTTPS without exposing the VPS directly.
|
|
577
|
+
- DuckDB files are not accessible from the internet — only through the dinary-server API.
|
|
578
|
+
|
|
579
|
+
---
|
|
580
|
+
|
|
581
|
+
## Build Plan (Incremental Phases)
|
|
582
|
+
|
|
583
|
+
### Phase 0: MVP — Manual Entry → Google Sheets (no DuckDB, no QR, no AI)
|
|
584
|
+
|
|
585
|
+
The fastest path to replacing manual spreadsheet editing. No new database, no receipt parsing — just a mobile frontend that writes directly to the existing Google Sheets structure.
|
|
586
|
+
|
|
587
|
+
**Scope:**
|
|
588
|
+
- A mobile frontend (chosen from the evaluation table, or a quick Telegram bot / PWA prototype) with a simple form: amount (RSD) + category (dropdown from the existing ~33 categories) + category group (auto-filled from category) + optional comment.
|
|
589
|
+
- A lightweight backend (Python script or serverless function) that receives the entry and writes it to the existing Google Sheets spreadsheet via the Sheets API.
|
|
590
|
+
- **Auto-month creation:** if the backend detects that rows for the current month don't exist yet in the sheet, it automatically creates the full block of category rows for the new month (copying the category/group structure from the previous month). This eliminates the most tedious manual step.
|
|
591
|
+
- Currency conversion: RSD → EUR using the same fixed rate currently used in the sheet.
|
|
592
|
+
- No item-level parsing, no DuckDB, no AI. The user picks the category manually, just as they do now — but from a phone instead of editing a spreadsheet.
|
|
593
|
+
|
|
594
|
+
**What this validates:**
|
|
595
|
+
- The chosen mobile frontend tool works for daily data entry (offline persistence, speed, UX).
|
|
596
|
+
- The Google Sheets API integration is reliable.
|
|
597
|
+
- The user actually adopts phone-based entry over direct spreadsheet editing.
|
|
598
|
+
|
|
599
|
+
**Exit criteria for Phase 0:** the user has used the system daily for 2+ weeks and no longer opens the spreadsheet to enter data manually.
|
|
600
|
+
|
|
601
|
+
### Phase 1: Data Foundation & Backend Deployment (dinary-server)
|
|
602
|
+
- Set up DuckDB schema (config.duckdb + budget_2026.duckdb) on VPS (Oracle Cloud Free Tier).
|
|
603
|
+
- Deploy dinary-server (FastAPI) with basic REST API for expense ingestion.
|
|
604
|
+
- Migrate existing Google Sheets data into DuckDB.
|
|
605
|
+
- Write basic SQL queries for monthly aggregates.
|
|
606
|
+
- Backend now writes to both DuckDB (primary) and Google Sheets (view layer).
|
|
607
|
+
- Set up Cloudflare Tunnel or direct HTTPS access to the backend.
|
|
608
|
+
|
|
609
|
+
### Phase 2: Receipt Parser
|
|
610
|
+
- Integrate or adapt sr-invoice-parser for fetching and parsing Serbian fiscal receipts from SUF PURS URLs.
|
|
611
|
+
- Build the ingestion pipeline: URL → fetch HTML → parse line items → insert into `expenses` table in DuckDB.
|
|
612
|
+
- Implement rule-based auto-classification.
|
|
613
|
+
|
|
614
|
+
### Phase 3: Mobile Input — Full Version (dinary-app)
|
|
615
|
+
- **3a: Frontend tool evaluation.** Research the candidate tools from the evaluation table (see "Frontend Tool Evaluation" section) **and any other tools discovered during research**. Build a minimal MVP (scan QR → send URL → see parsed items) with 1-2 top candidates. Compare: QR scanning reliability, offline data persistence, speed of manual entry, API connectivity, cross-platform behavior (Android + iOS), overall UX on phone. Decide on the tool. Note: if the Phase 0 tool already satisfies all must-have criteria, this step may be a confirmation rather than a new evaluation.
|
|
616
|
+
- **3b: Build the full mobile input layer** with the chosen tool.
|
|
617
|
+
- QR scan → send URL → parse → store.
|
|
618
|
+
- Manual entry for non-QR expenses.
|
|
619
|
+
- Event auto-suggestion and selection.
|
|
620
|
+
- Beneficiary selector.
|
|
621
|
+
- Offline queue with sync-on-reconnect.
|
|
622
|
+
|
|
623
|
+
### Phase 4: AI Classification (dinary-analyst)
|
|
624
|
+
- Build dinary-analyst as a Rust CLI binary.
|
|
625
|
+
- Implement the task queue API on dinary-server (`/api/tasks/*`).
|
|
626
|
+
- Build the batch classification flow: fetch pending → `claude -p` → push results.
|
|
627
|
+
- Implement the review/confirm flow (via dashboard or CLI).
|
|
628
|
+
- Wire up rule learning (confirmed classifications → new rules in `category_rules`).
|
|
629
|
+
|
|
630
|
+
### Phase 5: Dashboards (dinary-server)
|
|
631
|
+
- Operational dashboard (static HTML, current month snapshot).
|
|
632
|
+
- Analytical dashboard (interactive SPA with time range selector and breakdowns).
|
|
633
|
+
|
|
634
|
+
### Phase 6: AI Analysis & Google Sheets Sync (dinary-analyst + dinary-server)
|
|
635
|
+
- Add analysis export endpoint to dinary-server API.
|
|
636
|
+
- Build the dinary-analyst analysis flow: fetch aggregates → `claude -p` → push report.
|
|
637
|
+
- Build the Google Sheets sync script on dinary-server (if not already done in Phase 1).
|
|
638
|
+
- Set up scheduled runs on the VPS (sync, dashboard regeneration).
|
|
639
|
+
|
|
640
|
+
Each phase is independently useful.
|
|
641
|
+
|
|
642
|
+
- Phase 0 alone eliminates manual spreadsheet editing and validates the mobile input tool.
|
|
643
|
+
- Phase 1 establishes the proper data foundation.
|
|
644
|
+
- Phase 2 solves the supermarket opacity problem.
|
|
645
|
+
- Phase 3 adds QR scanning and full offline support.
|
|
646
|
+
- Phases 4-6 add intelligence and convenience.
|